]> git.proxmox.com Git - mirror_qemu.git/blame - exec.c
qmp: Add support of "dirty-bitmap" sync mode for drive-backup
[mirror_qemu.git] / exec.c
CommitLineData
54936004 1/*
5b6dd868 2 * Virtual page mapping
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
8167ee88 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
67b915a5 19#include "config.h"
777872e5 20#ifndef _WIN32
a98d49b1 21#include <sys/types.h>
d5a8f07c
FB
22#include <sys/mman.h>
23#endif
54936004 24
055403b2 25#include "qemu-common.h"
6180a181 26#include "cpu.h"
b67d9a52 27#include "tcg.h"
b3c7724c 28#include "hw/hw.h"
4485bd26 29#if !defined(CONFIG_USER_ONLY)
47c8ca53 30#include "hw/boards.h"
4485bd26 31#endif
cc9e98cb 32#include "hw/qdev.h"
1de7afc9 33#include "qemu/osdep.h"
9c17d615 34#include "sysemu/kvm.h"
2ff3de68 35#include "sysemu/sysemu.h"
0d09e41a 36#include "hw/xen/xen.h"
1de7afc9
PB
37#include "qemu/timer.h"
38#include "qemu/config-file.h"
75a34036 39#include "qemu/error-report.h"
022c62cb 40#include "exec/memory.h"
9c17d615 41#include "sysemu/dma.h"
022c62cb 42#include "exec/address-spaces.h"
53a5960a
PB
43#if defined(CONFIG_USER_ONLY)
44#include <qemu.h>
432d268c 45#else /* !CONFIG_USER_ONLY */
9c17d615 46#include "sysemu/xen-mapcache.h"
6506e4f9 47#include "trace.h"
53a5960a 48#endif
0d6d3c87 49#include "exec/cpu-all.h"
0dc3f44a 50#include "qemu/rcu_queue.h"
022c62cb 51#include "exec/cputlb.h"
5b6dd868 52#include "translate-all.h"
0cac1b66 53
022c62cb 54#include "exec/memory-internal.h"
220c3ebd 55#include "exec/ram_addr.h"
67d95c15 56
b35ba30f
MT
57#include "qemu/range.h"
58
db7b5426 59//#define DEBUG_SUBPAGE
1196be37 60
e2eef170 61#if !defined(CONFIG_USER_ONLY)
981fdf23 62static bool in_migration;
94a6b54f 63
0dc3f44a
MD
64/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
0d53d9fe 67RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
62152b8a
AK
68
69static MemoryRegion *system_memory;
309cb471 70static MemoryRegion *system_io;
62152b8a 71
f6790af6
AK
72AddressSpace address_space_io;
73AddressSpace address_space_memory;
2673a5da 74
0844e007 75MemoryRegion io_mem_rom, io_mem_notdirty;
acc9d80b 76static MemoryRegion io_mem_unassigned;
0e0df1e2 77
7bd4f430
PB
78/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79#define RAM_PREALLOC (1 << 0)
80
dbcb8981
PB
81/* RAM is mmap-ed with MAP_SHARED */
82#define RAM_SHARED (1 << 1)
83
62be4e3a
MT
84/* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87#define RAM_RESIZEABLE (1 << 2)
88
e2eef170 89#endif
9fa3e853 90
bdc44640 91struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
6a00d601
FB
92/* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
4917cf44 94DEFINE_TLS(CPUState *, current_cpu);
2e70f6ef 95/* 0 = Do not count executed instructions.
bf20dc07 96 1 = Precise instruction counting.
2e70f6ef 97 2 = Adaptive rate instruction counting. */
5708fc66 98int use_icount;
6a00d601 99
e2eef170 100#if !defined(CONFIG_USER_ONLY)
4346ae3e 101
1db8abb1
PB
102typedef struct PhysPageEntry PhysPageEntry;
103
104struct PhysPageEntry {
9736e55b 105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
8b795765 106 uint32_t skip : 6;
9736e55b 107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
8b795765 108 uint32_t ptr : 26;
1db8abb1
PB
109};
110
8b795765
MT
111#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
03f49957 113/* Size of the L2 (and L3, etc) page tables. */
57271d63 114#define ADDR_SPACE_BITS 64
03f49957 115
026736ce 116#define P_L2_BITS 9
03f49957
PB
117#define P_L2_SIZE (1 << P_L2_BITS)
118
119#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121typedef PhysPageEntry Node[P_L2_SIZE];
0475d94f 122
53cb28cb 123typedef struct PhysPageMap {
79e2b9ae
PB
124 struct rcu_head rcu;
125
53cb28cb
MA
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132} PhysPageMap;
133
1db8abb1 134struct AddressSpaceDispatch {
79e2b9ae
PB
135 struct rcu_head rcu;
136
1db8abb1
PB
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
53cb28cb 141 PhysPageMap map;
acc9d80b 142 AddressSpace *as;
1db8abb1
PB
143};
144
90260c6c
JK
145#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146typedef struct subpage_t {
147 MemoryRegion iomem;
acc9d80b 148 AddressSpace *as;
90260c6c
JK
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151} subpage_t;
152
b41aac4f
LPF
153#define PHYS_SECTION_UNASSIGNED 0
154#define PHYS_SECTION_NOTDIRTY 1
155#define PHYS_SECTION_ROM 2
156#define PHYS_SECTION_WATCH 3
5312bd8b 157
e2eef170 158static void io_mem_init(void);
62152b8a 159static void memory_map_init(void);
09daed84 160static void tcg_commit(MemoryListener *listener);
e2eef170 161
1ec9b909 162static MemoryRegion io_mem_watch;
6658ffb8 163#endif
fd6ce8f6 164
6d9a1304 165#if !defined(CONFIG_USER_ONLY)
d6f2ea22 166
53cb28cb 167static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
d6f2ea22 168{
53cb28cb
MA
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
d6f2ea22 173 }
f7bf5461
AK
174}
175
53cb28cb 176static uint32_t phys_map_node_alloc(PhysPageMap *map)
f7bf5461
AK
177{
178 unsigned i;
8b795765 179 uint32_t ret;
f7bf5461 180
53cb28cb 181 ret = map->nodes_nb++;
f7bf5461 182 assert(ret != PHYS_MAP_NODE_NIL);
53cb28cb 183 assert(ret != map->nodes_nb_alloc);
03f49957 184 for (i = 0; i < P_L2_SIZE; ++i) {
53cb28cb
MA
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
d6f2ea22 187 }
f7bf5461 188 return ret;
d6f2ea22
AK
189}
190
53cb28cb
MA
191static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
2999097b 193 int level)
f7bf5461
AK
194{
195 PhysPageEntry *p;
196 int i;
03f49957 197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
108c49b8 198
9736e55b 199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
53cb28cb
MA
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
f7bf5461 202 if (level == 0) {
03f49957 203 for (i = 0; i < P_L2_SIZE; i++) {
9736e55b 204 p[i].skip = 0;
b41aac4f 205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
4346ae3e 206 }
67c4d23c 207 }
f7bf5461 208 } else {
53cb28cb 209 p = map->nodes[lp->ptr];
92e873b9 210 }
03f49957 211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
f7bf5461 212
03f49957 213 while (*nb && lp < &p[P_L2_SIZE]) {
07f07b31 214 if ((*index & (step - 1)) == 0 && *nb >= step) {
9736e55b 215 lp->skip = 0;
c19e8800 216 lp->ptr = leaf;
07f07b31
AK
217 *index += step;
218 *nb -= step;
2999097b 219 } else {
53cb28cb 220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2999097b
AK
221 }
222 ++lp;
f7bf5461
AK
223 }
224}
225
ac1970fb 226static void phys_page_set(AddressSpaceDispatch *d,
a8170e5e 227 hwaddr index, hwaddr nb,
2999097b 228 uint16_t leaf)
f7bf5461 229{
2999097b 230 /* Wildly overreserve - it doesn't matter much. */
53cb28cb 231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
5cd2c5b6 232
53cb28cb 233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
92e873b9
FB
234}
235
b35ba30f
MT
236/* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240{
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287}
288
289static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290{
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
53cb28cb 294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
b35ba30f
MT
295 }
296}
297
97115a8d 298static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
9affd6fc 299 Node *nodes, MemoryRegionSection *sections)
92e873b9 300{
31ab2b4a 301 PhysPageEntry *p;
97115a8d 302 hwaddr index = addr >> TARGET_PAGE_BITS;
31ab2b4a 303 int i;
f1f6e3b8 304
9736e55b 305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
c19e8800 306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
9affd6fc 307 return &sections[PHYS_SECTION_UNASSIGNED];
31ab2b4a 308 }
9affd6fc 309 p = nodes[lp.ptr];
03f49957 310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
5312bd8b 311 }
b35ba30f
MT
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
f3705d53
AK
320}
321
e5548617
BS
322bool memory_region_is_unassigned(MemoryRegion *mr)
323{
2a8e7499 324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
5b6dd868 325 && mr != &io_mem_watch;
fd6ce8f6 326}
149f54b5 327
79e2b9ae 328/* Called from RCU critical section */
c7086b4a 329static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
90260c6c
JK
330 hwaddr addr,
331 bool resolve_subpage)
9f029603 332{
90260c6c
JK
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
53cb28cb 336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
90260c6c
JK
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
53cb28cb 339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
90260c6c
JK
340 }
341 return section;
9f029603
JK
342}
343
79e2b9ae 344/* Called from RCU critical section */
90260c6c 345static MemoryRegionSection *
c7086b4a 346address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
90260c6c 347 hwaddr *plen, bool resolve_subpage)
149f54b5
PB
348{
349 MemoryRegionSection *section;
a87f3954 350 Int128 diff;
149f54b5 351
c7086b4a 352 section = address_space_lookup_region(d, addr, resolve_subpage);
149f54b5
PB
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
3752a036 360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
149f54b5
PB
361 return section;
362}
90260c6c 363
a87f3954
PB
364static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365{
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374}
375
5c8a00ce
PB
376MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
90260c6c 379{
30951157
AK
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
4025446f 383 hwaddr len = *plen;
30951157 384
79e2b9ae 385 rcu_read_lock();
30951157 386 for (;;) {
79e2b9ae
PB
387 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
388 section = address_space_translate_internal(d, addr, &addr, plen, true);
30951157
AK
389 mr = section->mr;
390
391 if (!mr->iommu_ops) {
392 break;
393 }
394
8d7b8cb9 395 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
30951157
AK
396 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
397 | (addr & iotlb.addr_mask));
4025446f 398 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
30951157
AK
399 if (!(iotlb.perm & (1 << is_write))) {
400 mr = &io_mem_unassigned;
401 break;
402 }
403
404 as = iotlb.target_as;
405 }
406
fe680d0d 407 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
a87f3954 408 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
4025446f 409 len = MIN(page, len);
a87f3954
PB
410 }
411
4025446f 412 *plen = len;
30951157 413 *xlat = addr;
79e2b9ae 414 rcu_read_unlock();
30951157 415 return mr;
90260c6c
JK
416}
417
79e2b9ae 418/* Called from RCU critical section */
90260c6c 419MemoryRegionSection *
9d82b5a7
PB
420address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen)
90260c6c 422{
30951157 423 MemoryRegionSection *section;
9d82b5a7
PB
424 section = address_space_translate_internal(cpu->memory_dispatch,
425 addr, xlat, plen, false);
30951157
AK
426
427 assert(!section->mr->iommu_ops);
428 return section;
90260c6c 429}
5b6dd868 430#endif
fd6ce8f6 431
5b6dd868 432void cpu_exec_init_all(void)
fdbb84d1 433{
5b6dd868 434#if !defined(CONFIG_USER_ONLY)
b2a8658e 435 qemu_mutex_init(&ram_list.mutex);
5b6dd868
BS
436 memory_map_init();
437 io_mem_init();
fdbb84d1 438#endif
5b6dd868 439}
fdbb84d1 440
b170fce3 441#if !defined(CONFIG_USER_ONLY)
5b6dd868
BS
442
443static int cpu_common_post_load(void *opaque, int version_id)
fd6ce8f6 444{
259186a7 445 CPUState *cpu = opaque;
a513fe19 446
5b6dd868
BS
447 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
448 version_id is increased. */
259186a7 449 cpu->interrupt_request &= ~0x01;
c01a71c1 450 tlb_flush(cpu, 1);
5b6dd868
BS
451
452 return 0;
a513fe19 453}
7501267e 454
6c3bff0e
PD
455static int cpu_common_pre_load(void *opaque)
456{
457 CPUState *cpu = opaque;
458
adee6424 459 cpu->exception_index = -1;
6c3bff0e
PD
460
461 return 0;
462}
463
464static bool cpu_common_exception_index_needed(void *opaque)
465{
466 CPUState *cpu = opaque;
467
adee6424 468 return tcg_enabled() && cpu->exception_index != -1;
6c3bff0e
PD
469}
470
471static const VMStateDescription vmstate_cpu_common_exception_index = {
472 .name = "cpu_common/exception_index",
473 .version_id = 1,
474 .minimum_version_id = 1,
475 .fields = (VMStateField[]) {
476 VMSTATE_INT32(exception_index, CPUState),
477 VMSTATE_END_OF_LIST()
478 }
479};
480
1a1562f5 481const VMStateDescription vmstate_cpu_common = {
5b6dd868
BS
482 .name = "cpu_common",
483 .version_id = 1,
484 .minimum_version_id = 1,
6c3bff0e 485 .pre_load = cpu_common_pre_load,
5b6dd868 486 .post_load = cpu_common_post_load,
35d08458 487 .fields = (VMStateField[]) {
259186a7
AF
488 VMSTATE_UINT32(halted, CPUState),
489 VMSTATE_UINT32(interrupt_request, CPUState),
5b6dd868 490 VMSTATE_END_OF_LIST()
6c3bff0e
PD
491 },
492 .subsections = (VMStateSubsection[]) {
493 {
494 .vmsd = &vmstate_cpu_common_exception_index,
495 .needed = cpu_common_exception_index_needed,
496 } , {
497 /* empty */
498 }
5b6dd868
BS
499 }
500};
1a1562f5 501
5b6dd868 502#endif
ea041c0e 503
38d8f5c8 504CPUState *qemu_get_cpu(int index)
ea041c0e 505{
bdc44640 506 CPUState *cpu;
ea041c0e 507
bdc44640 508 CPU_FOREACH(cpu) {
55e5c285 509 if (cpu->cpu_index == index) {
bdc44640 510 return cpu;
55e5c285 511 }
ea041c0e 512 }
5b6dd868 513
bdc44640 514 return NULL;
ea041c0e
FB
515}
516
09daed84
EI
517#if !defined(CONFIG_USER_ONLY)
518void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
519{
520 /* We only support one address space per cpu at the moment. */
521 assert(cpu->as == as);
522
523 if (cpu->tcg_as_listener) {
524 memory_listener_unregister(cpu->tcg_as_listener);
525 } else {
526 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
527 }
528 cpu->tcg_as_listener->commit = tcg_commit;
529 memory_listener_register(cpu->tcg_as_listener, as);
530}
531#endif
532
5b6dd868 533void cpu_exec_init(CPUArchState *env)
ea041c0e 534{
5b6dd868 535 CPUState *cpu = ENV_GET_CPU(env);
b170fce3 536 CPUClass *cc = CPU_GET_CLASS(cpu);
bdc44640 537 CPUState *some_cpu;
5b6dd868
BS
538 int cpu_index;
539
540#if defined(CONFIG_USER_ONLY)
541 cpu_list_lock();
542#endif
5b6dd868 543 cpu_index = 0;
bdc44640 544 CPU_FOREACH(some_cpu) {
5b6dd868
BS
545 cpu_index++;
546 }
55e5c285 547 cpu->cpu_index = cpu_index;
1b1ed8dc 548 cpu->numa_node = 0;
f0c3c505 549 QTAILQ_INIT(&cpu->breakpoints);
ff4700b0 550 QTAILQ_INIT(&cpu->watchpoints);
5b6dd868 551#ifndef CONFIG_USER_ONLY
09daed84 552 cpu->as = &address_space_memory;
5b6dd868 553 cpu->thread_id = qemu_get_thread_id();
cba70549 554 cpu_reload_memory_map(cpu);
5b6dd868 555#endif
bdc44640 556 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
5b6dd868
BS
557#if defined(CONFIG_USER_ONLY)
558 cpu_list_unlock();
559#endif
e0d47944
AF
560 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
561 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
562 }
5b6dd868 563#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
5b6dd868
BS
564 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
565 cpu_save, cpu_load, env);
b170fce3 566 assert(cc->vmsd == NULL);
e0d47944 567 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
5b6dd868 568#endif
b170fce3
AF
569 if (cc->vmsd != NULL) {
570 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
571 }
ea041c0e
FB
572}
573
94df27fd 574#if defined(CONFIG_USER_ONLY)
00b941e5 575static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
94df27fd
PB
576{
577 tb_invalidate_phys_page_range(pc, pc + 1, 0);
578}
579#else
00b941e5 580static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
1e7855a5 581{
e8262a1b
MF
582 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
583 if (phys != -1) {
09daed84 584 tb_invalidate_phys_addr(cpu->as,
29d8ec7b 585 phys | (pc & ~TARGET_PAGE_MASK));
e8262a1b 586 }
1e7855a5 587}
c27004ec 588#endif
d720b93d 589
c527ee8f 590#if defined(CONFIG_USER_ONLY)
75a34036 591void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
c527ee8f
PB
592
593{
594}
595
3ee887e8
PM
596int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
597 int flags)
598{
599 return -ENOSYS;
600}
601
602void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
603{
604}
605
75a34036 606int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
c527ee8f
PB
607 int flags, CPUWatchpoint **watchpoint)
608{
609 return -ENOSYS;
610}
611#else
6658ffb8 612/* Add a watchpoint. */
75a34036 613int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 614 int flags, CPUWatchpoint **watchpoint)
6658ffb8 615{
c0ce998e 616 CPUWatchpoint *wp;
6658ffb8 617
05068c0d 618 /* forbid ranges which are empty or run off the end of the address space */
07e2863d 619 if (len == 0 || (addr + len - 1) < addr) {
75a34036
AF
620 error_report("tried to set invalid watchpoint at %"
621 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
b4051334
AL
622 return -EINVAL;
623 }
7267c094 624 wp = g_malloc(sizeof(*wp));
a1d1bb31
AL
625
626 wp->vaddr = addr;
05068c0d 627 wp->len = len;
a1d1bb31
AL
628 wp->flags = flags;
629
2dc9f411 630 /* keep all GDB-injected watchpoints in front */
ff4700b0
AF
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
633 } else {
634 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
635 }
6658ffb8 636
31b030d4 637 tlb_flush_page(cpu, addr);
a1d1bb31
AL
638
639 if (watchpoint)
640 *watchpoint = wp;
641 return 0;
6658ffb8
PB
642}
643
a1d1bb31 644/* Remove a specific watchpoint. */
75a34036 645int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 646 int flags)
6658ffb8 647{
a1d1bb31 648 CPUWatchpoint *wp;
6658ffb8 649
ff4700b0 650 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 651 if (addr == wp->vaddr && len == wp->len
6e140f28 652 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
75a34036 653 cpu_watchpoint_remove_by_ref(cpu, wp);
6658ffb8
PB
654 return 0;
655 }
656 }
a1d1bb31 657 return -ENOENT;
6658ffb8
PB
658}
659
a1d1bb31 660/* Remove a specific watchpoint by reference. */
75a34036 661void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
a1d1bb31 662{
ff4700b0 663 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
7d03f82f 664
31b030d4 665 tlb_flush_page(cpu, watchpoint->vaddr);
a1d1bb31 666
7267c094 667 g_free(watchpoint);
a1d1bb31
AL
668}
669
670/* Remove all matching watchpoints. */
75a34036 671void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 672{
c0ce998e 673 CPUWatchpoint *wp, *next;
a1d1bb31 674
ff4700b0 675 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
75a34036
AF
676 if (wp->flags & mask) {
677 cpu_watchpoint_remove_by_ref(cpu, wp);
678 }
c0ce998e 679 }
7d03f82f 680}
05068c0d
PM
681
682/* Return true if this watchpoint address matches the specified
683 * access (ie the address range covered by the watchpoint overlaps
684 * partially or completely with the address range covered by the
685 * access).
686 */
687static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
688 vaddr addr,
689 vaddr len)
690{
691 /* We know the lengths are non-zero, but a little caution is
692 * required to avoid errors in the case where the range ends
693 * exactly at the top of the address space and so addr + len
694 * wraps round to zero.
695 */
696 vaddr wpend = wp->vaddr + wp->len - 1;
697 vaddr addrend = addr + len - 1;
698
699 return !(addr > wpend || wp->vaddr > addrend);
700}
701
c527ee8f 702#endif
7d03f82f 703
a1d1bb31 704/* Add a breakpoint. */
b3310ab3 705int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
a1d1bb31 706 CPUBreakpoint **breakpoint)
4c3a88a2 707{
c0ce998e 708 CPUBreakpoint *bp;
3b46e624 709
7267c094 710 bp = g_malloc(sizeof(*bp));
4c3a88a2 711
a1d1bb31
AL
712 bp->pc = pc;
713 bp->flags = flags;
714
2dc9f411 715 /* keep all GDB-injected breakpoints in front */
00b941e5 716 if (flags & BP_GDB) {
f0c3c505 717 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
00b941e5 718 } else {
f0c3c505 719 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
00b941e5 720 }
3b46e624 721
f0c3c505 722 breakpoint_invalidate(cpu, pc);
a1d1bb31 723
00b941e5 724 if (breakpoint) {
a1d1bb31 725 *breakpoint = bp;
00b941e5 726 }
4c3a88a2 727 return 0;
4c3a88a2
FB
728}
729
a1d1bb31 730/* Remove a specific breakpoint. */
b3310ab3 731int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
a1d1bb31 732{
a1d1bb31
AL
733 CPUBreakpoint *bp;
734
f0c3c505 735 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
a1d1bb31 736 if (bp->pc == pc && bp->flags == flags) {
b3310ab3 737 cpu_breakpoint_remove_by_ref(cpu, bp);
a1d1bb31
AL
738 return 0;
739 }
7d03f82f 740 }
a1d1bb31 741 return -ENOENT;
7d03f82f
EI
742}
743
a1d1bb31 744/* Remove a specific breakpoint by reference. */
b3310ab3 745void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
4c3a88a2 746{
f0c3c505
AF
747 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
748
749 breakpoint_invalidate(cpu, breakpoint->pc);
a1d1bb31 750
7267c094 751 g_free(breakpoint);
a1d1bb31
AL
752}
753
754/* Remove all matching breakpoints. */
b3310ab3 755void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 756{
c0ce998e 757 CPUBreakpoint *bp, *next;
a1d1bb31 758
f0c3c505 759 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
b3310ab3
AF
760 if (bp->flags & mask) {
761 cpu_breakpoint_remove_by_ref(cpu, bp);
762 }
c0ce998e 763 }
4c3a88a2
FB
764}
765
c33a346e
FB
766/* enable or disable single step mode. EXCP_DEBUG is returned by the
767 CPU loop after each instruction */
3825b28f 768void cpu_single_step(CPUState *cpu, int enabled)
c33a346e 769{
ed2803da
AF
770 if (cpu->singlestep_enabled != enabled) {
771 cpu->singlestep_enabled = enabled;
772 if (kvm_enabled()) {
38e478ec 773 kvm_update_guest_debug(cpu, 0);
ed2803da 774 } else {
ccbb4d44 775 /* must flush all the translated code to avoid inconsistencies */
e22a25c9 776 /* XXX: only flush what is necessary */
38e478ec 777 CPUArchState *env = cpu->env_ptr;
e22a25c9
AL
778 tb_flush(env);
779 }
c33a346e 780 }
c33a346e
FB
781}
782
a47dddd7 783void cpu_abort(CPUState *cpu, const char *fmt, ...)
7501267e
FB
784{
785 va_list ap;
493ae1f0 786 va_list ap2;
7501267e
FB
787
788 va_start(ap, fmt);
493ae1f0 789 va_copy(ap2, ap);
7501267e
FB
790 fprintf(stderr, "qemu: fatal: ");
791 vfprintf(stderr, fmt, ap);
792 fprintf(stderr, "\n");
878096ee 793 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
93fcfe39
AL
794 if (qemu_log_enabled()) {
795 qemu_log("qemu: fatal: ");
796 qemu_log_vprintf(fmt, ap2);
797 qemu_log("\n");
a0762859 798 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
31b1a7b4 799 qemu_log_flush();
93fcfe39 800 qemu_log_close();
924edcae 801 }
493ae1f0 802 va_end(ap2);
f9373291 803 va_end(ap);
fd052bf6
RV
804#if defined(CONFIG_USER_ONLY)
805 {
806 struct sigaction act;
807 sigfillset(&act.sa_mask);
808 act.sa_handler = SIG_DFL;
809 sigaction(SIGABRT, &act, NULL);
810 }
811#endif
7501267e
FB
812 abort();
813}
814
0124311e 815#if !defined(CONFIG_USER_ONLY)
0dc3f44a 816/* Called from RCU critical section */
041603fe
PB
817static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
818{
819 RAMBlock *block;
820
43771539 821 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 822 if (block && addr - block->offset < block->max_length) {
041603fe
PB
823 goto found;
824 }
0dc3f44a 825 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 826 if (addr - block->offset < block->max_length) {
041603fe
PB
827 goto found;
828 }
829 }
830
831 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
832 abort();
833
834found:
43771539
PB
835 /* It is safe to write mru_block outside the iothread lock. This
836 * is what happens:
837 *
838 * mru_block = xxx
839 * rcu_read_unlock()
840 * xxx removed from list
841 * rcu_read_lock()
842 * read mru_block
843 * mru_block = NULL;
844 * call_rcu(reclaim_ramblock, xxx);
845 * rcu_read_unlock()
846 *
847 * atomic_rcu_set is not needed here. The block was already published
848 * when it was placed into the list. Here we're just making an extra
849 * copy of the pointer.
850 */
041603fe
PB
851 ram_list.mru_block = block;
852 return block;
853}
854
a2f4d5be 855static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
d24981d3 856{
041603fe 857 ram_addr_t start1;
a2f4d5be
JQ
858 RAMBlock *block;
859 ram_addr_t end;
860
861 end = TARGET_PAGE_ALIGN(start + length);
862 start &= TARGET_PAGE_MASK;
d24981d3 863
0dc3f44a 864 rcu_read_lock();
041603fe
PB
865 block = qemu_get_ram_block(start);
866 assert(block == qemu_get_ram_block(end - 1));
1240be24 867 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
041603fe 868 cpu_tlb_reset_dirty_all(start1, length);
0dc3f44a 869 rcu_read_unlock();
d24981d3
JQ
870}
871
5579c7f3 872/* Note: start and end must be within the same ram block. */
a2f4d5be 873void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
52159192 874 unsigned client)
1ccde1cb 875{
1ccde1cb
FB
876 if (length == 0)
877 return;
c8d6f66a 878 cpu_physical_memory_clear_dirty_range_type(start, length, client);
f23db169 879
d24981d3 880 if (tcg_enabled()) {
a2f4d5be 881 tlb_reset_dirty_range_all(start, length);
5579c7f3 882 }
1ccde1cb
FB
883}
884
981fdf23 885static void cpu_physical_memory_set_dirty_tracking(bool enable)
74576198
AL
886{
887 in_migration = enable;
74576198
AL
888}
889
79e2b9ae 890/* Called from RCU critical section */
bb0e627a 891hwaddr memory_region_section_get_iotlb(CPUState *cpu,
149f54b5
PB
892 MemoryRegionSection *section,
893 target_ulong vaddr,
894 hwaddr paddr, hwaddr xlat,
895 int prot,
896 target_ulong *address)
e5548617 897{
a8170e5e 898 hwaddr iotlb;
e5548617
BS
899 CPUWatchpoint *wp;
900
cc5bea60 901 if (memory_region_is_ram(section->mr)) {
e5548617
BS
902 /* Normal RAM. */
903 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
149f54b5 904 + xlat;
e5548617 905 if (!section->readonly) {
b41aac4f 906 iotlb |= PHYS_SECTION_NOTDIRTY;
e5548617 907 } else {
b41aac4f 908 iotlb |= PHYS_SECTION_ROM;
e5548617
BS
909 }
910 } else {
1b3fb98f 911 iotlb = section - section->address_space->dispatch->map.sections;
149f54b5 912 iotlb += xlat;
e5548617
BS
913 }
914
915 /* Make accesses to pages with watchpoints go via the
916 watchpoint trap routines. */
ff4700b0 917 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 918 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
e5548617
BS
919 /* Avoid trapping reads of pages with a write breakpoint. */
920 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
b41aac4f 921 iotlb = PHYS_SECTION_WATCH + paddr;
e5548617
BS
922 *address |= TLB_MMIO;
923 break;
924 }
925 }
926 }
927
928 return iotlb;
929}
9fa3e853
FB
930#endif /* defined(CONFIG_USER_ONLY) */
931
e2eef170 932#if !defined(CONFIG_USER_ONLY)
8da3ff18 933
c227f099 934static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 935 uint16_t section);
acc9d80b 936static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
54688b1e 937
a2b257d6
IM
938static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
939 qemu_anon_ram_alloc;
91138037
MA
940
941/*
942 * Set a custom physical guest memory alloator.
943 * Accelerators with unusual needs may need this. Hopefully, we can
944 * get rid of it eventually.
945 */
a2b257d6 946void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
91138037
MA
947{
948 phys_mem_alloc = alloc;
949}
950
53cb28cb
MA
951static uint16_t phys_section_add(PhysPageMap *map,
952 MemoryRegionSection *section)
5312bd8b 953{
68f3f65b
PB
954 /* The physical section number is ORed with a page-aligned
955 * pointer to produce the iotlb entries. Thus it should
956 * never overflow into the page-aligned value.
957 */
53cb28cb 958 assert(map->sections_nb < TARGET_PAGE_SIZE);
68f3f65b 959
53cb28cb
MA
960 if (map->sections_nb == map->sections_nb_alloc) {
961 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
962 map->sections = g_renew(MemoryRegionSection, map->sections,
963 map->sections_nb_alloc);
5312bd8b 964 }
53cb28cb 965 map->sections[map->sections_nb] = *section;
dfde4e6e 966 memory_region_ref(section->mr);
53cb28cb 967 return map->sections_nb++;
5312bd8b
AK
968}
969
058bc4b5
PB
970static void phys_section_destroy(MemoryRegion *mr)
971{
dfde4e6e
PB
972 memory_region_unref(mr);
973
058bc4b5
PB
974 if (mr->subpage) {
975 subpage_t *subpage = container_of(mr, subpage_t, iomem);
b4fefef9 976 object_unref(OBJECT(&subpage->iomem));
058bc4b5
PB
977 g_free(subpage);
978 }
979}
980
6092666e 981static void phys_sections_free(PhysPageMap *map)
5312bd8b 982{
9affd6fc
PB
983 while (map->sections_nb > 0) {
984 MemoryRegionSection *section = &map->sections[--map->sections_nb];
058bc4b5
PB
985 phys_section_destroy(section->mr);
986 }
9affd6fc
PB
987 g_free(map->sections);
988 g_free(map->nodes);
5312bd8b
AK
989}
990
ac1970fb 991static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
0f0cb164
AK
992{
993 subpage_t *subpage;
a8170e5e 994 hwaddr base = section->offset_within_address_space
0f0cb164 995 & TARGET_PAGE_MASK;
97115a8d 996 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
53cb28cb 997 d->map.nodes, d->map.sections);
0f0cb164
AK
998 MemoryRegionSection subsection = {
999 .offset_within_address_space = base,
052e87b0 1000 .size = int128_make64(TARGET_PAGE_SIZE),
0f0cb164 1001 };
a8170e5e 1002 hwaddr start, end;
0f0cb164 1003
f3705d53 1004 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
0f0cb164 1005
f3705d53 1006 if (!(existing->mr->subpage)) {
acc9d80b 1007 subpage = subpage_init(d->as, base);
3be91e86 1008 subsection.address_space = d->as;
0f0cb164 1009 subsection.mr = &subpage->iomem;
ac1970fb 1010 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
53cb28cb 1011 phys_section_add(&d->map, &subsection));
0f0cb164 1012 } else {
f3705d53 1013 subpage = container_of(existing->mr, subpage_t, iomem);
0f0cb164
AK
1014 }
1015 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
052e87b0 1016 end = start + int128_get64(section->size) - 1;
53cb28cb
MA
1017 subpage_register(subpage, start, end,
1018 phys_section_add(&d->map, section));
0f0cb164
AK
1019}
1020
1021
052e87b0
PB
1022static void register_multipage(AddressSpaceDispatch *d,
1023 MemoryRegionSection *section)
33417e70 1024{
a8170e5e 1025 hwaddr start_addr = section->offset_within_address_space;
53cb28cb 1026 uint16_t section_index = phys_section_add(&d->map, section);
052e87b0
PB
1027 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1028 TARGET_PAGE_BITS));
dd81124b 1029
733d5ef5
PB
1030 assert(num_pages);
1031 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
33417e70
FB
1032}
1033
ac1970fb 1034static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
0f0cb164 1035{
89ae337a 1036 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
00752703 1037 AddressSpaceDispatch *d = as->next_dispatch;
99b9cc06 1038 MemoryRegionSection now = *section, remain = *section;
052e87b0 1039 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
0f0cb164 1040
733d5ef5
PB
1041 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1042 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1043 - now.offset_within_address_space;
1044
052e87b0 1045 now.size = int128_min(int128_make64(left), now.size);
ac1970fb 1046 register_subpage(d, &now);
733d5ef5 1047 } else {
052e87b0 1048 now.size = int128_zero();
733d5ef5 1049 }
052e87b0
PB
1050 while (int128_ne(remain.size, now.size)) {
1051 remain.size = int128_sub(remain.size, now.size);
1052 remain.offset_within_address_space += int128_get64(now.size);
1053 remain.offset_within_region += int128_get64(now.size);
69b67646 1054 now = remain;
052e87b0 1055 if (int128_lt(remain.size, page_size)) {
733d5ef5 1056 register_subpage(d, &now);
88266249 1057 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
052e87b0 1058 now.size = page_size;
ac1970fb 1059 register_subpage(d, &now);
69b67646 1060 } else {
052e87b0 1061 now.size = int128_and(now.size, int128_neg(page_size));
ac1970fb 1062 register_multipage(d, &now);
69b67646 1063 }
0f0cb164
AK
1064 }
1065}
1066
62a2744c
SY
1067void qemu_flush_coalesced_mmio_buffer(void)
1068{
1069 if (kvm_enabled())
1070 kvm_flush_coalesced_mmio_buffer();
1071}
1072
b2a8658e
UD
1073void qemu_mutex_lock_ramlist(void)
1074{
1075 qemu_mutex_lock(&ram_list.mutex);
1076}
1077
1078void qemu_mutex_unlock_ramlist(void)
1079{
1080 qemu_mutex_unlock(&ram_list.mutex);
1081}
1082
e1e84ba0 1083#ifdef __linux__
c902760f
MT
1084
1085#include <sys/vfs.h>
1086
1087#define HUGETLBFS_MAGIC 0x958458f6
1088
fc7a5800 1089static long gethugepagesize(const char *path, Error **errp)
c902760f
MT
1090{
1091 struct statfs fs;
1092 int ret;
1093
1094 do {
9742bf26 1095 ret = statfs(path, &fs);
c902760f
MT
1096 } while (ret != 0 && errno == EINTR);
1097
1098 if (ret != 0) {
fc7a5800
HT
1099 error_setg_errno(errp, errno, "failed to get page size of file %s",
1100 path);
9742bf26 1101 return 0;
c902760f
MT
1102 }
1103
1104 if (fs.f_type != HUGETLBFS_MAGIC)
9742bf26 1105 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
c902760f
MT
1106
1107 return fs.f_bsize;
1108}
1109
04b16653
AW
1110static void *file_ram_alloc(RAMBlock *block,
1111 ram_addr_t memory,
7f56e740
PB
1112 const char *path,
1113 Error **errp)
c902760f
MT
1114{
1115 char *filename;
8ca761f6
PF
1116 char *sanitized_name;
1117 char *c;
557529dd 1118 void *area = NULL;
c902760f 1119 int fd;
557529dd 1120 uint64_t hpagesize;
fc7a5800 1121 Error *local_err = NULL;
c902760f 1122
fc7a5800
HT
1123 hpagesize = gethugepagesize(path, &local_err);
1124 if (local_err) {
1125 error_propagate(errp, local_err);
f9a49dfa 1126 goto error;
c902760f 1127 }
a2b257d6 1128 block->mr->align = hpagesize;
c902760f
MT
1129
1130 if (memory < hpagesize) {
557529dd
HT
1131 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1132 "or larger than huge page size 0x%" PRIx64,
1133 memory, hpagesize);
1134 goto error;
c902760f
MT
1135 }
1136
1137 if (kvm_enabled() && !kvm_has_sync_mmu()) {
7f56e740
PB
1138 error_setg(errp,
1139 "host lacks kvm mmu notifiers, -mem-path unsupported");
f9a49dfa 1140 goto error;
c902760f
MT
1141 }
1142
8ca761f6 1143 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
83234bf2 1144 sanitized_name = g_strdup(memory_region_name(block->mr));
8ca761f6
PF
1145 for (c = sanitized_name; *c != '\0'; c++) {
1146 if (*c == '/')
1147 *c = '_';
1148 }
1149
1150 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1151 sanitized_name);
1152 g_free(sanitized_name);
c902760f
MT
1153
1154 fd = mkstemp(filename);
1155 if (fd < 0) {
7f56e740
PB
1156 error_setg_errno(errp, errno,
1157 "unable to create backing store for hugepages");
e4ada482 1158 g_free(filename);
f9a49dfa 1159 goto error;
c902760f
MT
1160 }
1161 unlink(filename);
e4ada482 1162 g_free(filename);
c902760f
MT
1163
1164 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1165
1166 /*
1167 * ftruncate is not supported by hugetlbfs in older
1168 * hosts, so don't bother bailing out on errors.
1169 * If anything goes wrong with it under other filesystems,
1170 * mmap will fail.
1171 */
7f56e740 1172 if (ftruncate(fd, memory)) {
9742bf26 1173 perror("ftruncate");
7f56e740 1174 }
c902760f 1175
dbcb8981
PB
1176 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1177 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1178 fd, 0);
c902760f 1179 if (area == MAP_FAILED) {
7f56e740
PB
1180 error_setg_errno(errp, errno,
1181 "unable to map backing store for hugepages");
9742bf26 1182 close(fd);
f9a49dfa 1183 goto error;
c902760f 1184 }
ef36fa14
MT
1185
1186 if (mem_prealloc) {
38183310 1187 os_mem_prealloc(fd, area, memory);
ef36fa14
MT
1188 }
1189
04b16653 1190 block->fd = fd;
c902760f 1191 return area;
f9a49dfa
MT
1192
1193error:
1194 if (mem_prealloc) {
81b07353 1195 error_report("%s", error_get_pretty(*errp));
f9a49dfa
MT
1196 exit(1);
1197 }
1198 return NULL;
c902760f
MT
1199}
1200#endif
1201
0dc3f44a 1202/* Called with the ramlist lock held. */
d17b5288 1203static ram_addr_t find_ram_offset(ram_addr_t size)
04b16653
AW
1204{
1205 RAMBlock *block, *next_block;
3e837b2c 1206 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
04b16653 1207
49cd9ac6
SH
1208 assert(size != 0); /* it would hand out same offset multiple times */
1209
0dc3f44a 1210 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
04b16653 1211 return 0;
0d53d9fe 1212 }
04b16653 1213
0dc3f44a 1214 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
f15fbc4b 1215 ram_addr_t end, next = RAM_ADDR_MAX;
04b16653 1216
62be4e3a 1217 end = block->offset + block->max_length;
04b16653 1218
0dc3f44a 1219 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
04b16653
AW
1220 if (next_block->offset >= end) {
1221 next = MIN(next, next_block->offset);
1222 }
1223 }
1224 if (next - end >= size && next - end < mingap) {
3e837b2c 1225 offset = end;
04b16653
AW
1226 mingap = next - end;
1227 }
1228 }
3e837b2c
AW
1229
1230 if (offset == RAM_ADDR_MAX) {
1231 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1232 (uint64_t)size);
1233 abort();
1234 }
1235
04b16653
AW
1236 return offset;
1237}
1238
652d7ec2 1239ram_addr_t last_ram_offset(void)
d17b5288
AW
1240{
1241 RAMBlock *block;
1242 ram_addr_t last = 0;
1243
0dc3f44a
MD
1244 rcu_read_lock();
1245 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
62be4e3a 1246 last = MAX(last, block->offset + block->max_length);
0d53d9fe 1247 }
0dc3f44a 1248 rcu_read_unlock();
d17b5288
AW
1249 return last;
1250}
1251
ddb97f1d
JB
1252static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1253{
1254 int ret;
ddb97f1d
JB
1255
1256 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
47c8ca53 1257 if (!machine_dump_guest_core(current_machine)) {
ddb97f1d
JB
1258 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1259 if (ret) {
1260 perror("qemu_madvise");
1261 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1262 "but dump_guest_core=off specified\n");
1263 }
1264 }
1265}
1266
0dc3f44a
MD
1267/* Called within an RCU critical section, or while the ramlist lock
1268 * is held.
1269 */
20cfe881 1270static RAMBlock *find_ram_block(ram_addr_t addr)
84b89d78 1271{
20cfe881 1272 RAMBlock *block;
84b89d78 1273
0dc3f44a 1274 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1275 if (block->offset == addr) {
20cfe881 1276 return block;
c5705a77
AK
1277 }
1278 }
20cfe881
HT
1279
1280 return NULL;
1281}
1282
ae3a7047 1283/* Called with iothread lock held. */
20cfe881
HT
1284void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1285{
ae3a7047 1286 RAMBlock *new_block, *block;
20cfe881 1287
0dc3f44a 1288 rcu_read_lock();
ae3a7047 1289 new_block = find_ram_block(addr);
c5705a77
AK
1290 assert(new_block);
1291 assert(!new_block->idstr[0]);
84b89d78 1292
09e5ab63
AL
1293 if (dev) {
1294 char *id = qdev_get_dev_path(dev);
84b89d78
CM
1295 if (id) {
1296 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
7267c094 1297 g_free(id);
84b89d78
CM
1298 }
1299 }
1300 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1301
0dc3f44a 1302 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1303 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
84b89d78
CM
1304 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1305 new_block->idstr);
1306 abort();
1307 }
1308 }
0dc3f44a 1309 rcu_read_unlock();
c5705a77
AK
1310}
1311
ae3a7047 1312/* Called with iothread lock held. */
20cfe881
HT
1313void qemu_ram_unset_idstr(ram_addr_t addr)
1314{
ae3a7047 1315 RAMBlock *block;
20cfe881 1316
ae3a7047
MD
1317 /* FIXME: arch_init.c assumes that this is not called throughout
1318 * migration. Ignore the problem since hot-unplug during migration
1319 * does not work anyway.
1320 */
1321
0dc3f44a 1322 rcu_read_lock();
ae3a7047 1323 block = find_ram_block(addr);
20cfe881
HT
1324 if (block) {
1325 memset(block->idstr, 0, sizeof(block->idstr));
1326 }
0dc3f44a 1327 rcu_read_unlock();
20cfe881
HT
1328}
1329
8490fc78
LC
1330static int memory_try_enable_merging(void *addr, size_t len)
1331{
75cc7f01 1332 if (!machine_mem_merge(current_machine)) {
8490fc78
LC
1333 /* disabled by the user */
1334 return 0;
1335 }
1336
1337 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1338}
1339
62be4e3a
MT
1340/* Only legal before guest might have detected the memory size: e.g. on
1341 * incoming migration, or right after reset.
1342 *
1343 * As memory core doesn't know how is memory accessed, it is up to
1344 * resize callback to update device state and/or add assertions to detect
1345 * misuse, if necessary.
1346 */
1347int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1348{
1349 RAMBlock *block = find_ram_block(base);
1350
1351 assert(block);
1352
129ddaf3
MT
1353 newsize = TARGET_PAGE_ALIGN(newsize);
1354
62be4e3a
MT
1355 if (block->used_length == newsize) {
1356 return 0;
1357 }
1358
1359 if (!(block->flags & RAM_RESIZEABLE)) {
1360 error_setg_errno(errp, EINVAL,
1361 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1362 " in != 0x" RAM_ADDR_FMT, block->idstr,
1363 newsize, block->used_length);
1364 return -EINVAL;
1365 }
1366
1367 if (block->max_length < newsize) {
1368 error_setg_errno(errp, EINVAL,
1369 "Length too large: %s: 0x" RAM_ADDR_FMT
1370 " > 0x" RAM_ADDR_FMT, block->idstr,
1371 newsize, block->max_length);
1372 return -EINVAL;
1373 }
1374
1375 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1376 block->used_length = newsize;
1377 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1378 memory_region_set_size(block->mr, newsize);
1379 if (block->resized) {
1380 block->resized(block->idstr, newsize, block->host);
1381 }
1382 return 0;
1383}
1384
ef701d7b 1385static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
c5705a77 1386{
e1c57ab8 1387 RAMBlock *block;
0d53d9fe 1388 RAMBlock *last_block = NULL;
2152f5ca
JQ
1389 ram_addr_t old_ram_size, new_ram_size;
1390
1391 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
c5705a77 1392
b2a8658e 1393 qemu_mutex_lock_ramlist();
9b8424d5 1394 new_block->offset = find_ram_offset(new_block->max_length);
e1c57ab8
PB
1395
1396 if (!new_block->host) {
1397 if (xen_enabled()) {
9b8424d5
MT
1398 xen_ram_alloc(new_block->offset, new_block->max_length,
1399 new_block->mr);
e1c57ab8 1400 } else {
9b8424d5 1401 new_block->host = phys_mem_alloc(new_block->max_length,
a2b257d6 1402 &new_block->mr->align);
39228250 1403 if (!new_block->host) {
ef701d7b
HT
1404 error_setg_errno(errp, errno,
1405 "cannot set up guest memory '%s'",
1406 memory_region_name(new_block->mr));
1407 qemu_mutex_unlock_ramlist();
1408 return -1;
39228250 1409 }
9b8424d5 1410 memory_try_enable_merging(new_block->host, new_block->max_length);
6977dfe6 1411 }
c902760f 1412 }
94a6b54f 1413
0d53d9fe
MD
1414 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1415 * QLIST (which has an RCU-friendly variant) does not have insertion at
1416 * tail, so save the last element in last_block.
1417 */
0dc3f44a 1418 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
0d53d9fe 1419 last_block = block;
9b8424d5 1420 if (block->max_length < new_block->max_length) {
abb26d63
PB
1421 break;
1422 }
1423 }
1424 if (block) {
0dc3f44a 1425 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
0d53d9fe 1426 } else if (last_block) {
0dc3f44a 1427 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
0d53d9fe 1428 } else { /* list is empty */
0dc3f44a 1429 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
abb26d63 1430 }
0d6d3c87 1431 ram_list.mru_block = NULL;
94a6b54f 1432
0dc3f44a
MD
1433 /* Write list before version */
1434 smp_wmb();
f798b07f 1435 ram_list.version++;
b2a8658e 1436 qemu_mutex_unlock_ramlist();
f798b07f 1437
2152f5ca
JQ
1438 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1439
1440 if (new_ram_size > old_ram_size) {
1ab4c8ce 1441 int i;
ae3a7047
MD
1442
1443 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1ab4c8ce
JQ
1444 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1445 ram_list.dirty_memory[i] =
1446 bitmap_zero_extend(ram_list.dirty_memory[i],
1447 old_ram_size, new_ram_size);
1448 }
2152f5ca 1449 }
9b8424d5
MT
1450 cpu_physical_memory_set_dirty_range(new_block->offset,
1451 new_block->used_length);
94a6b54f 1452
a904c911
PB
1453 if (new_block->host) {
1454 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1455 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1456 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1457 if (kvm_enabled()) {
1458 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1459 }
e1c57ab8 1460 }
6f0437e8 1461
94a6b54f
PB
1462 return new_block->offset;
1463}
e9a1ab19 1464
0b183fc8 1465#ifdef __linux__
e1c57ab8 1466ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
dbcb8981 1467 bool share, const char *mem_path,
7f56e740 1468 Error **errp)
e1c57ab8
PB
1469{
1470 RAMBlock *new_block;
ef701d7b
HT
1471 ram_addr_t addr;
1472 Error *local_err = NULL;
e1c57ab8
PB
1473
1474 if (xen_enabled()) {
7f56e740
PB
1475 error_setg(errp, "-mem-path not supported with Xen");
1476 return -1;
e1c57ab8
PB
1477 }
1478
1479 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1480 /*
1481 * file_ram_alloc() needs to allocate just like
1482 * phys_mem_alloc, but we haven't bothered to provide
1483 * a hook there.
1484 */
7f56e740
PB
1485 error_setg(errp,
1486 "-mem-path not supported with this accelerator");
1487 return -1;
e1c57ab8
PB
1488 }
1489
1490 size = TARGET_PAGE_ALIGN(size);
1491 new_block = g_malloc0(sizeof(*new_block));
1492 new_block->mr = mr;
9b8424d5
MT
1493 new_block->used_length = size;
1494 new_block->max_length = size;
dbcb8981 1495 new_block->flags = share ? RAM_SHARED : 0;
7f56e740
PB
1496 new_block->host = file_ram_alloc(new_block, size,
1497 mem_path, errp);
1498 if (!new_block->host) {
1499 g_free(new_block);
1500 return -1;
1501 }
1502
ef701d7b
HT
1503 addr = ram_block_add(new_block, &local_err);
1504 if (local_err) {
1505 g_free(new_block);
1506 error_propagate(errp, local_err);
1507 return -1;
1508 }
1509 return addr;
e1c57ab8 1510}
0b183fc8 1511#endif
e1c57ab8 1512
62be4e3a
MT
1513static
1514ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1515 void (*resized)(const char*,
1516 uint64_t length,
1517 void *host),
1518 void *host, bool resizeable,
ef701d7b 1519 MemoryRegion *mr, Error **errp)
e1c57ab8
PB
1520{
1521 RAMBlock *new_block;
ef701d7b
HT
1522 ram_addr_t addr;
1523 Error *local_err = NULL;
e1c57ab8
PB
1524
1525 size = TARGET_PAGE_ALIGN(size);
62be4e3a 1526 max_size = TARGET_PAGE_ALIGN(max_size);
e1c57ab8
PB
1527 new_block = g_malloc0(sizeof(*new_block));
1528 new_block->mr = mr;
62be4e3a 1529 new_block->resized = resized;
9b8424d5
MT
1530 new_block->used_length = size;
1531 new_block->max_length = max_size;
62be4e3a 1532 assert(max_size >= size);
e1c57ab8
PB
1533 new_block->fd = -1;
1534 new_block->host = host;
1535 if (host) {
7bd4f430 1536 new_block->flags |= RAM_PREALLOC;
e1c57ab8 1537 }
62be4e3a
MT
1538 if (resizeable) {
1539 new_block->flags |= RAM_RESIZEABLE;
1540 }
ef701d7b
HT
1541 addr = ram_block_add(new_block, &local_err);
1542 if (local_err) {
1543 g_free(new_block);
1544 error_propagate(errp, local_err);
1545 return -1;
1546 }
1547 return addr;
e1c57ab8
PB
1548}
1549
62be4e3a
MT
1550ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1551 MemoryRegion *mr, Error **errp)
1552{
1553 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1554}
1555
ef701d7b 1556ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
6977dfe6 1557{
62be4e3a
MT
1558 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1559}
1560
1561ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1562 void (*resized)(const char*,
1563 uint64_t length,
1564 void *host),
1565 MemoryRegion *mr, Error **errp)
1566{
1567 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
6977dfe6
YT
1568}
1569
1f2e98b6
AW
1570void qemu_ram_free_from_ptr(ram_addr_t addr)
1571{
1572 RAMBlock *block;
1573
b2a8658e 1574 qemu_mutex_lock_ramlist();
0dc3f44a 1575 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1f2e98b6 1576 if (addr == block->offset) {
0dc3f44a 1577 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1578 ram_list.mru_block = NULL;
0dc3f44a
MD
1579 /* Write list before version */
1580 smp_wmb();
f798b07f 1581 ram_list.version++;
43771539 1582 g_free_rcu(block, rcu);
b2a8658e 1583 break;
1f2e98b6
AW
1584 }
1585 }
b2a8658e 1586 qemu_mutex_unlock_ramlist();
1f2e98b6
AW
1587}
1588
43771539
PB
1589static void reclaim_ramblock(RAMBlock *block)
1590{
1591 if (block->flags & RAM_PREALLOC) {
1592 ;
1593 } else if (xen_enabled()) {
1594 xen_invalidate_map_cache_entry(block->host);
1595#ifndef _WIN32
1596 } else if (block->fd >= 0) {
1597 munmap(block->host, block->max_length);
1598 close(block->fd);
1599#endif
1600 } else {
1601 qemu_anon_ram_free(block->host, block->max_length);
1602 }
1603 g_free(block);
1604}
1605
c227f099 1606void qemu_ram_free(ram_addr_t addr)
e9a1ab19 1607{
04b16653
AW
1608 RAMBlock *block;
1609
b2a8658e 1610 qemu_mutex_lock_ramlist();
0dc3f44a 1611 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
04b16653 1612 if (addr == block->offset) {
0dc3f44a 1613 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1614 ram_list.mru_block = NULL;
0dc3f44a
MD
1615 /* Write list before version */
1616 smp_wmb();
f798b07f 1617 ram_list.version++;
43771539 1618 call_rcu(block, reclaim_ramblock, rcu);
b2a8658e 1619 break;
04b16653
AW
1620 }
1621 }
b2a8658e 1622 qemu_mutex_unlock_ramlist();
e9a1ab19
FB
1623}
1624
cd19cfa2
HY
1625#ifndef _WIN32
1626void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1627{
1628 RAMBlock *block;
1629 ram_addr_t offset;
1630 int flags;
1631 void *area, *vaddr;
1632
0dc3f44a 1633 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
cd19cfa2 1634 offset = addr - block->offset;
9b8424d5 1635 if (offset < block->max_length) {
1240be24 1636 vaddr = ramblock_ptr(block, offset);
7bd4f430 1637 if (block->flags & RAM_PREALLOC) {
cd19cfa2 1638 ;
dfeaf2ab
MA
1639 } else if (xen_enabled()) {
1640 abort();
cd19cfa2
HY
1641 } else {
1642 flags = MAP_FIXED;
3435f395 1643 if (block->fd >= 0) {
dbcb8981
PB
1644 flags |= (block->flags & RAM_SHARED ?
1645 MAP_SHARED : MAP_PRIVATE);
3435f395
MA
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, block->fd, offset);
cd19cfa2 1648 } else {
2eb9fbaa
MA
1649 /*
1650 * Remap needs to match alloc. Accelerators that
1651 * set phys_mem_alloc never remap. If they did,
1652 * we'd need a remap hook here.
1653 */
1654 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1655
cd19cfa2
HY
1656 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1657 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1658 flags, -1, 0);
cd19cfa2
HY
1659 }
1660 if (area != vaddr) {
f15fbc4b
AP
1661 fprintf(stderr, "Could not remap addr: "
1662 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
cd19cfa2
HY
1663 length, addr);
1664 exit(1);
1665 }
8490fc78 1666 memory_try_enable_merging(vaddr, length);
ddb97f1d 1667 qemu_ram_setup_dump(vaddr, length);
cd19cfa2 1668 }
cd19cfa2
HY
1669 }
1670 }
1671}
1672#endif /* !_WIN32 */
1673
a35ba7be
PB
1674int qemu_get_ram_fd(ram_addr_t addr)
1675{
ae3a7047
MD
1676 RAMBlock *block;
1677 int fd;
a35ba7be 1678
0dc3f44a 1679 rcu_read_lock();
ae3a7047
MD
1680 block = qemu_get_ram_block(addr);
1681 fd = block->fd;
0dc3f44a 1682 rcu_read_unlock();
ae3a7047 1683 return fd;
a35ba7be
PB
1684}
1685
3fd74b84
DM
1686void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1687{
ae3a7047
MD
1688 RAMBlock *block;
1689 void *ptr;
3fd74b84 1690
0dc3f44a 1691 rcu_read_lock();
ae3a7047
MD
1692 block = qemu_get_ram_block(addr);
1693 ptr = ramblock_ptr(block, 0);
0dc3f44a 1694 rcu_read_unlock();
ae3a7047 1695 return ptr;
3fd74b84
DM
1696}
1697
1b5ec234 1698/* Return a host pointer to ram allocated with qemu_ram_alloc.
ae3a7047
MD
1699 * This should not be used for general purpose DMA. Use address_space_map
1700 * or address_space_rw instead. For local memory (e.g. video ram) that the
1701 * device owns, use memory_region_get_ram_ptr.
0dc3f44a
MD
1702 *
1703 * By the time this function returns, the returned pointer is not protected
1704 * by RCU anymore. If the caller is not within an RCU critical section and
1705 * does not hold the iothread lock, it must have other means of protecting the
1706 * pointer, such as a reference to the region that includes the incoming
1707 * ram_addr_t.
1b5ec234
PB
1708 */
1709void *qemu_get_ram_ptr(ram_addr_t addr)
1710{
ae3a7047
MD
1711 RAMBlock *block;
1712 void *ptr;
1b5ec234 1713
0dc3f44a 1714 rcu_read_lock();
ae3a7047
MD
1715 block = qemu_get_ram_block(addr);
1716
1717 if (xen_enabled() && block->host == NULL) {
0d6d3c87
PB
1718 /* We need to check if the requested address is in the RAM
1719 * because we don't want to map the entire memory in QEMU.
1720 * In that case just map until the end of the page.
1721 */
1722 if (block->offset == 0) {
ae3a7047 1723 ptr = xen_map_cache(addr, 0, 0);
0dc3f44a 1724 goto unlock;
0d6d3c87 1725 }
ae3a7047
MD
1726
1727 block->host = xen_map_cache(block->offset, block->max_length, 1);
0d6d3c87 1728 }
ae3a7047
MD
1729 ptr = ramblock_ptr(block, addr - block->offset);
1730
0dc3f44a
MD
1731unlock:
1732 rcu_read_unlock();
ae3a7047 1733 return ptr;
dc828ca1
PB
1734}
1735
38bee5dc 1736/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
ae3a7047 1737 * but takes a size argument.
0dc3f44a
MD
1738 *
1739 * By the time this function returns, the returned pointer is not protected
1740 * by RCU anymore. If the caller is not within an RCU critical section and
1741 * does not hold the iothread lock, it must have other means of protecting the
1742 * pointer, such as a reference to the region that includes the incoming
1743 * ram_addr_t.
ae3a7047 1744 */
cb85f7ab 1745static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
38bee5dc 1746{
ae3a7047 1747 void *ptr;
8ab934f9
SS
1748 if (*size == 0) {
1749 return NULL;
1750 }
868bb33f 1751 if (xen_enabled()) {
e41d7c69 1752 return xen_map_cache(addr, *size, 1);
868bb33f 1753 } else {
38bee5dc 1754 RAMBlock *block;
0dc3f44a
MD
1755 rcu_read_lock();
1756 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5
MT
1757 if (addr - block->offset < block->max_length) {
1758 if (addr - block->offset + *size > block->max_length)
1759 *size = block->max_length - addr + block->offset;
ae3a7047 1760 ptr = ramblock_ptr(block, addr - block->offset);
0dc3f44a 1761 rcu_read_unlock();
ae3a7047 1762 return ptr;
38bee5dc
SS
1763 }
1764 }
1765
1766 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1767 abort();
38bee5dc
SS
1768 }
1769}
1770
7443b437 1771/* Some of the softmmu routines need to translate from a host pointer
ae3a7047
MD
1772 * (typically a TLB entry) back to a ram offset.
1773 *
1774 * By the time this function returns, the returned pointer is not protected
1775 * by RCU anymore. If the caller is not within an RCU critical section and
1776 * does not hold the iothread lock, it must have other means of protecting the
1777 * pointer, such as a reference to the region that includes the incoming
1778 * ram_addr_t.
1779 */
1b5ec234 1780MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
5579c7f3 1781{
94a6b54f
PB
1782 RAMBlock *block;
1783 uint8_t *host = ptr;
ae3a7047 1784 MemoryRegion *mr;
94a6b54f 1785
868bb33f 1786 if (xen_enabled()) {
0dc3f44a 1787 rcu_read_lock();
e41d7c69 1788 *ram_addr = xen_ram_addr_from_mapcache(ptr);
ae3a7047 1789 mr = qemu_get_ram_block(*ram_addr)->mr;
0dc3f44a 1790 rcu_read_unlock();
ae3a7047 1791 return mr;
712c2b41
SS
1792 }
1793
0dc3f44a
MD
1794 rcu_read_lock();
1795 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 1796 if (block && block->host && host - block->host < block->max_length) {
23887b79
PB
1797 goto found;
1798 }
1799
0dc3f44a 1800 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
432d268c
JN
1801 /* This case append when the block is not mapped. */
1802 if (block->host == NULL) {
1803 continue;
1804 }
9b8424d5 1805 if (host - block->host < block->max_length) {
23887b79 1806 goto found;
f471a17e 1807 }
94a6b54f 1808 }
432d268c 1809
0dc3f44a 1810 rcu_read_unlock();
1b5ec234 1811 return NULL;
23887b79
PB
1812
1813found:
1814 *ram_addr = block->offset + (host - block->host);
ae3a7047 1815 mr = block->mr;
0dc3f44a 1816 rcu_read_unlock();
ae3a7047 1817 return mr;
e890261f 1818}
f471a17e 1819
a8170e5e 1820static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
0e0df1e2 1821 uint64_t val, unsigned size)
9fa3e853 1822{
52159192 1823 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
0e0df1e2 1824 tb_invalidate_phys_page_fast(ram_addr, size);
3a7d929e 1825 }
0e0df1e2
AK
1826 switch (size) {
1827 case 1:
1828 stb_p(qemu_get_ram_ptr(ram_addr), val);
1829 break;
1830 case 2:
1831 stw_p(qemu_get_ram_ptr(ram_addr), val);
1832 break;
1833 case 4:
1834 stl_p(qemu_get_ram_ptr(ram_addr), val);
1835 break;
1836 default:
1837 abort();
3a7d929e 1838 }
6886867e 1839 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
f23db169
FB
1840 /* we remove the notdirty callback only if the code has been
1841 flushed */
a2cd8c85 1842 if (!cpu_physical_memory_is_clean(ram_addr)) {
4917cf44 1843 CPUArchState *env = current_cpu->env_ptr;
93afeade 1844 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
4917cf44 1845 }
9fa3e853
FB
1846}
1847
b018ddf6
PB
1848static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1849 unsigned size, bool is_write)
1850{
1851 return is_write;
1852}
1853
0e0df1e2 1854static const MemoryRegionOps notdirty_mem_ops = {
0e0df1e2 1855 .write = notdirty_mem_write,
b018ddf6 1856 .valid.accepts = notdirty_mem_accepts,
0e0df1e2 1857 .endianness = DEVICE_NATIVE_ENDIAN,
1ccde1cb
FB
1858};
1859
0f459d16 1860/* Generate a debug exception if a watchpoint has been hit. */
66b9b43c 1861static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
0f459d16 1862{
93afeade
AF
1863 CPUState *cpu = current_cpu;
1864 CPUArchState *env = cpu->env_ptr;
06d55cc1 1865 target_ulong pc, cs_base;
0f459d16 1866 target_ulong vaddr;
a1d1bb31 1867 CPUWatchpoint *wp;
06d55cc1 1868 int cpu_flags;
0f459d16 1869
ff4700b0 1870 if (cpu->watchpoint_hit) {
06d55cc1
AL
1871 /* We re-entered the check after replacing the TB. Now raise
1872 * the debug interrupt so that is will trigger after the
1873 * current instruction. */
93afeade 1874 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
06d55cc1
AL
1875 return;
1876 }
93afeade 1877 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
ff4700b0 1878 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d
PM
1879 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1880 && (wp->flags & flags)) {
08225676
PM
1881 if (flags == BP_MEM_READ) {
1882 wp->flags |= BP_WATCHPOINT_HIT_READ;
1883 } else {
1884 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1885 }
1886 wp->hitaddr = vaddr;
66b9b43c 1887 wp->hitattrs = attrs;
ff4700b0
AF
1888 if (!cpu->watchpoint_hit) {
1889 cpu->watchpoint_hit = wp;
239c51a5 1890 tb_check_watchpoint(cpu);
6e140f28 1891 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
27103424 1892 cpu->exception_index = EXCP_DEBUG;
5638d180 1893 cpu_loop_exit(cpu);
6e140f28
AL
1894 } else {
1895 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
648f034c 1896 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
0ea8cb88 1897 cpu_resume_from_signal(cpu, NULL);
6e140f28 1898 }
06d55cc1 1899 }
6e140f28
AL
1900 } else {
1901 wp->flags &= ~BP_WATCHPOINT_HIT;
0f459d16
PB
1902 }
1903 }
1904}
1905
6658ffb8
PB
1906/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1907 so these check for a hit then pass through to the normal out-of-line
1908 phys routines. */
66b9b43c
PM
1909static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
1910 unsigned size, MemTxAttrs attrs)
6658ffb8 1911{
66b9b43c
PM
1912 MemTxResult res;
1913 uint64_t data;
1914
1915 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
1ec9b909 1916 switch (size) {
66b9b43c
PM
1917 case 1:
1918 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
1919 break;
1920 case 2:
1921 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
1922 break;
1923 case 4:
1924 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
1925 break;
1ec9b909
AK
1926 default: abort();
1927 }
66b9b43c
PM
1928 *pdata = data;
1929 return res;
6658ffb8
PB
1930}
1931
66b9b43c
PM
1932static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
1933 uint64_t val, unsigned size,
1934 MemTxAttrs attrs)
6658ffb8 1935{
66b9b43c
PM
1936 MemTxResult res;
1937
1938 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
1ec9b909 1939 switch (size) {
67364150 1940 case 1:
66b9b43c 1941 address_space_stb(&address_space_memory, addr, val, attrs, &res);
67364150
MF
1942 break;
1943 case 2:
66b9b43c 1944 address_space_stw(&address_space_memory, addr, val, attrs, &res);
67364150
MF
1945 break;
1946 case 4:
66b9b43c 1947 address_space_stl(&address_space_memory, addr, val, attrs, &res);
67364150 1948 break;
1ec9b909
AK
1949 default: abort();
1950 }
66b9b43c 1951 return res;
6658ffb8
PB
1952}
1953
1ec9b909 1954static const MemoryRegionOps watch_mem_ops = {
66b9b43c
PM
1955 .read_with_attrs = watch_mem_read,
1956 .write_with_attrs = watch_mem_write,
1ec9b909 1957 .endianness = DEVICE_NATIVE_ENDIAN,
6658ffb8 1958};
6658ffb8 1959
f25a49e0
PM
1960static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
1961 unsigned len, MemTxAttrs attrs)
db7b5426 1962{
acc9d80b 1963 subpage_t *subpage = opaque;
ff6cff75 1964 uint8_t buf[8];
5c9eb028 1965 MemTxResult res;
791af8c8 1966
db7b5426 1967#if defined(DEBUG_SUBPAGE)
016e9d62 1968 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
acc9d80b 1969 subpage, len, addr);
db7b5426 1970#endif
5c9eb028
PM
1971 res = address_space_read(subpage->as, addr + subpage->base,
1972 attrs, buf, len);
1973 if (res) {
1974 return res;
f25a49e0 1975 }
acc9d80b
JK
1976 switch (len) {
1977 case 1:
f25a49e0
PM
1978 *data = ldub_p(buf);
1979 return MEMTX_OK;
acc9d80b 1980 case 2:
f25a49e0
PM
1981 *data = lduw_p(buf);
1982 return MEMTX_OK;
acc9d80b 1983 case 4:
f25a49e0
PM
1984 *data = ldl_p(buf);
1985 return MEMTX_OK;
ff6cff75 1986 case 8:
f25a49e0
PM
1987 *data = ldq_p(buf);
1988 return MEMTX_OK;
acc9d80b
JK
1989 default:
1990 abort();
1991 }
db7b5426
BS
1992}
1993
f25a49e0
PM
1994static MemTxResult subpage_write(void *opaque, hwaddr addr,
1995 uint64_t value, unsigned len, MemTxAttrs attrs)
db7b5426 1996{
acc9d80b 1997 subpage_t *subpage = opaque;
ff6cff75 1998 uint8_t buf[8];
acc9d80b 1999
db7b5426 2000#if defined(DEBUG_SUBPAGE)
016e9d62 2001 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
acc9d80b
JK
2002 " value %"PRIx64"\n",
2003 __func__, subpage, len, addr, value);
db7b5426 2004#endif
acc9d80b
JK
2005 switch (len) {
2006 case 1:
2007 stb_p(buf, value);
2008 break;
2009 case 2:
2010 stw_p(buf, value);
2011 break;
2012 case 4:
2013 stl_p(buf, value);
2014 break;
ff6cff75
PB
2015 case 8:
2016 stq_p(buf, value);
2017 break;
acc9d80b
JK
2018 default:
2019 abort();
2020 }
5c9eb028
PM
2021 return address_space_write(subpage->as, addr + subpage->base,
2022 attrs, buf, len);
db7b5426
BS
2023}
2024
c353e4cc 2025static bool subpage_accepts(void *opaque, hwaddr addr,
016e9d62 2026 unsigned len, bool is_write)
c353e4cc 2027{
acc9d80b 2028 subpage_t *subpage = opaque;
c353e4cc 2029#if defined(DEBUG_SUBPAGE)
016e9d62 2030 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
acc9d80b 2031 __func__, subpage, is_write ? 'w' : 'r', len, addr);
c353e4cc
PB
2032#endif
2033
acc9d80b 2034 return address_space_access_valid(subpage->as, addr + subpage->base,
016e9d62 2035 len, is_write);
c353e4cc
PB
2036}
2037
70c68e44 2038static const MemoryRegionOps subpage_ops = {
f25a49e0
PM
2039 .read_with_attrs = subpage_read,
2040 .write_with_attrs = subpage_write,
ff6cff75
PB
2041 .impl.min_access_size = 1,
2042 .impl.max_access_size = 8,
2043 .valid.min_access_size = 1,
2044 .valid.max_access_size = 8,
c353e4cc 2045 .valid.accepts = subpage_accepts,
70c68e44 2046 .endianness = DEVICE_NATIVE_ENDIAN,
db7b5426
BS
2047};
2048
c227f099 2049static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 2050 uint16_t section)
db7b5426
BS
2051{
2052 int idx, eidx;
2053
2054 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2055 return -1;
2056 idx = SUBPAGE_IDX(start);
2057 eidx = SUBPAGE_IDX(end);
2058#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2059 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2060 __func__, mmio, start, end, idx, eidx, section);
db7b5426 2061#endif
db7b5426 2062 for (; idx <= eidx; idx++) {
5312bd8b 2063 mmio->sub_section[idx] = section;
db7b5426
BS
2064 }
2065
2066 return 0;
2067}
2068
acc9d80b 2069static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
db7b5426 2070{
c227f099 2071 subpage_t *mmio;
db7b5426 2072
7267c094 2073 mmio = g_malloc0(sizeof(subpage_t));
1eec614b 2074
acc9d80b 2075 mmio->as = as;
1eec614b 2076 mmio->base = base;
2c9b15ca 2077 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
b4fefef9 2078 NULL, TARGET_PAGE_SIZE);
b3b00c78 2079 mmio->iomem.subpage = true;
db7b5426 2080#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2081 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2082 mmio, base, TARGET_PAGE_SIZE);
db7b5426 2083#endif
b41aac4f 2084 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
db7b5426
BS
2085
2086 return mmio;
2087}
2088
a656e22f
PC
2089static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2090 MemoryRegion *mr)
5312bd8b 2091{
a656e22f 2092 assert(as);
5312bd8b 2093 MemoryRegionSection section = {
a656e22f 2094 .address_space = as,
5312bd8b
AK
2095 .mr = mr,
2096 .offset_within_address_space = 0,
2097 .offset_within_region = 0,
052e87b0 2098 .size = int128_2_64(),
5312bd8b
AK
2099 };
2100
53cb28cb 2101 return phys_section_add(map, &section);
5312bd8b
AK
2102}
2103
9d82b5a7 2104MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
aa102231 2105{
79e2b9ae
PB
2106 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2107 MemoryRegionSection *sections = d->map.sections;
9d82b5a7
PB
2108
2109 return sections[index & ~TARGET_PAGE_MASK].mr;
aa102231
AK
2110}
2111
e9179ce1
AK
2112static void io_mem_init(void)
2113{
1f6245e5 2114 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2c9b15ca 2115 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1f6245e5 2116 NULL, UINT64_MAX);
2c9b15ca 2117 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1f6245e5 2118 NULL, UINT64_MAX);
2c9b15ca 2119 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1f6245e5 2120 NULL, UINT64_MAX);
e9179ce1
AK
2121}
2122
ac1970fb 2123static void mem_begin(MemoryListener *listener)
00752703
PB
2124{
2125 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
53cb28cb
MA
2126 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2127 uint16_t n;
2128
a656e22f 2129 n = dummy_section(&d->map, as, &io_mem_unassigned);
53cb28cb 2130 assert(n == PHYS_SECTION_UNASSIGNED);
a656e22f 2131 n = dummy_section(&d->map, as, &io_mem_notdirty);
53cb28cb 2132 assert(n == PHYS_SECTION_NOTDIRTY);
a656e22f 2133 n = dummy_section(&d->map, as, &io_mem_rom);
53cb28cb 2134 assert(n == PHYS_SECTION_ROM);
a656e22f 2135 n = dummy_section(&d->map, as, &io_mem_watch);
53cb28cb 2136 assert(n == PHYS_SECTION_WATCH);
00752703 2137
9736e55b 2138 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
00752703
PB
2139 d->as = as;
2140 as->next_dispatch = d;
2141}
2142
79e2b9ae
PB
2143static void address_space_dispatch_free(AddressSpaceDispatch *d)
2144{
2145 phys_sections_free(&d->map);
2146 g_free(d);
2147}
2148
00752703 2149static void mem_commit(MemoryListener *listener)
ac1970fb 2150{
89ae337a 2151 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
0475d94f
PB
2152 AddressSpaceDispatch *cur = as->dispatch;
2153 AddressSpaceDispatch *next = as->next_dispatch;
2154
53cb28cb 2155 phys_page_compact_all(next, next->map.nodes_nb);
b35ba30f 2156
79e2b9ae 2157 atomic_rcu_set(&as->dispatch, next);
53cb28cb 2158 if (cur) {
79e2b9ae 2159 call_rcu(cur, address_space_dispatch_free, rcu);
53cb28cb 2160 }
9affd6fc
PB
2161}
2162
1d71148e 2163static void tcg_commit(MemoryListener *listener)
50c1e149 2164{
182735ef 2165 CPUState *cpu;
117712c3
AK
2166
2167 /* since each CPU stores ram addresses in its TLB cache, we must
2168 reset the modified entries */
2169 /* XXX: slow ! */
bdc44640 2170 CPU_FOREACH(cpu) {
33bde2e1
EI
2171 /* FIXME: Disentangle the cpu.h circular files deps so we can
2172 directly get the right CPU from listener. */
2173 if (cpu->tcg_as_listener != listener) {
2174 continue;
2175 }
76e5c76f 2176 cpu_reload_memory_map(cpu);
117712c3 2177 }
50c1e149
AK
2178}
2179
93632747
AK
2180static void core_log_global_start(MemoryListener *listener)
2181{
981fdf23 2182 cpu_physical_memory_set_dirty_tracking(true);
93632747
AK
2183}
2184
2185static void core_log_global_stop(MemoryListener *listener)
2186{
981fdf23 2187 cpu_physical_memory_set_dirty_tracking(false);
93632747
AK
2188}
2189
93632747 2190static MemoryListener core_memory_listener = {
93632747
AK
2191 .log_global_start = core_log_global_start,
2192 .log_global_stop = core_log_global_stop,
ac1970fb 2193 .priority = 1,
93632747
AK
2194};
2195
ac1970fb
AK
2196void address_space_init_dispatch(AddressSpace *as)
2197{
00752703 2198 as->dispatch = NULL;
89ae337a 2199 as->dispatch_listener = (MemoryListener) {
ac1970fb 2200 .begin = mem_begin,
00752703 2201 .commit = mem_commit,
ac1970fb
AK
2202 .region_add = mem_add,
2203 .region_nop = mem_add,
2204 .priority = 0,
2205 };
89ae337a 2206 memory_listener_register(&as->dispatch_listener, as);
ac1970fb
AK
2207}
2208
6e48e8f9
PB
2209void address_space_unregister(AddressSpace *as)
2210{
2211 memory_listener_unregister(&as->dispatch_listener);
2212}
2213
83f3c251
AK
2214void address_space_destroy_dispatch(AddressSpace *as)
2215{
2216 AddressSpaceDispatch *d = as->dispatch;
2217
79e2b9ae
PB
2218 atomic_rcu_set(&as->dispatch, NULL);
2219 if (d) {
2220 call_rcu(d, address_space_dispatch_free, rcu);
2221 }
83f3c251
AK
2222}
2223
62152b8a
AK
2224static void memory_map_init(void)
2225{
7267c094 2226 system_memory = g_malloc(sizeof(*system_memory));
03f49957 2227
57271d63 2228 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
7dca8043 2229 address_space_init(&address_space_memory, system_memory, "memory");
309cb471 2230
7267c094 2231 system_io = g_malloc(sizeof(*system_io));
3bb28b72
JK
2232 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2233 65536);
7dca8043 2234 address_space_init(&address_space_io, system_io, "I/O");
93632747 2235
f6790af6 2236 memory_listener_register(&core_memory_listener, &address_space_memory);
62152b8a
AK
2237}
2238
2239MemoryRegion *get_system_memory(void)
2240{
2241 return system_memory;
2242}
2243
309cb471
AK
2244MemoryRegion *get_system_io(void)
2245{
2246 return system_io;
2247}
2248
e2eef170
PB
2249#endif /* !defined(CONFIG_USER_ONLY) */
2250
13eb76e0
FB
2251/* physical memory access (slow version, mainly for debug) */
2252#if defined(CONFIG_USER_ONLY)
f17ec444 2253int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
a68fe89c 2254 uint8_t *buf, int len, int is_write)
13eb76e0
FB
2255{
2256 int l, flags;
2257 target_ulong page;
53a5960a 2258 void * p;
13eb76e0
FB
2259
2260 while (len > 0) {
2261 page = addr & TARGET_PAGE_MASK;
2262 l = (page + TARGET_PAGE_SIZE) - addr;
2263 if (l > len)
2264 l = len;
2265 flags = page_get_flags(page);
2266 if (!(flags & PAGE_VALID))
a68fe89c 2267 return -1;
13eb76e0
FB
2268 if (is_write) {
2269 if (!(flags & PAGE_WRITE))
a68fe89c 2270 return -1;
579a97f7 2271 /* XXX: this code should not depend on lock_user */
72fb7daa 2272 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
a68fe89c 2273 return -1;
72fb7daa
AJ
2274 memcpy(p, buf, l);
2275 unlock_user(p, addr, l);
13eb76e0
FB
2276 } else {
2277 if (!(flags & PAGE_READ))
a68fe89c 2278 return -1;
579a97f7 2279 /* XXX: this code should not depend on lock_user */
72fb7daa 2280 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
a68fe89c 2281 return -1;
72fb7daa 2282 memcpy(buf, p, l);
5b257578 2283 unlock_user(p, addr, 0);
13eb76e0
FB
2284 }
2285 len -= l;
2286 buf += l;
2287 addr += l;
2288 }
a68fe89c 2289 return 0;
13eb76e0 2290}
8df1cd07 2291
13eb76e0 2292#else
51d7a9eb 2293
a8170e5e
AK
2294static void invalidate_and_set_dirty(hwaddr addr,
2295 hwaddr length)
51d7a9eb 2296{
f874bf90
PM
2297 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2298 tb_invalidate_phys_range(addr, addr + length, 0);
6886867e 2299 cpu_physical_memory_set_dirty_range_nocode(addr, length);
51d7a9eb 2300 }
e226939d 2301 xen_modified_memory(addr, length);
51d7a9eb
AP
2302}
2303
23326164 2304static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
82f2563f 2305{
e1622f4b 2306 unsigned access_size_max = mr->ops->valid.max_access_size;
23326164
RH
2307
2308 /* Regions are assumed to support 1-4 byte accesses unless
2309 otherwise specified. */
23326164
RH
2310 if (access_size_max == 0) {
2311 access_size_max = 4;
2312 }
2313
2314 /* Bound the maximum access by the alignment of the address. */
2315 if (!mr->ops->impl.unaligned) {
2316 unsigned align_size_max = addr & -addr;
2317 if (align_size_max != 0 && align_size_max < access_size_max) {
2318 access_size_max = align_size_max;
2319 }
82f2563f 2320 }
23326164
RH
2321
2322 /* Don't attempt accesses larger than the maximum. */
2323 if (l > access_size_max) {
2324 l = access_size_max;
82f2563f 2325 }
098178f2
PB
2326 if (l & (l - 1)) {
2327 l = 1 << (qemu_fls(l) - 1);
2328 }
23326164
RH
2329
2330 return l;
82f2563f
PB
2331}
2332
5c9eb028
PM
2333MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2334 uint8_t *buf, int len, bool is_write)
13eb76e0 2335{
149f54b5 2336 hwaddr l;
13eb76e0 2337 uint8_t *ptr;
791af8c8 2338 uint64_t val;
149f54b5 2339 hwaddr addr1;
5c8a00ce 2340 MemoryRegion *mr;
3b643495 2341 MemTxResult result = MEMTX_OK;
3b46e624 2342
13eb76e0 2343 while (len > 0) {
149f54b5 2344 l = len;
5c8a00ce 2345 mr = address_space_translate(as, addr, &addr1, &l, is_write);
3b46e624 2346
13eb76e0 2347 if (is_write) {
5c8a00ce
PB
2348 if (!memory_access_is_direct(mr, is_write)) {
2349 l = memory_access_size(mr, l, addr1);
4917cf44 2350 /* XXX: could force current_cpu to NULL to avoid
6a00d601 2351 potential bugs */
23326164
RH
2352 switch (l) {
2353 case 8:
2354 /* 64 bit write access */
2355 val = ldq_p(buf);
3b643495
PM
2356 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2357 attrs);
23326164
RH
2358 break;
2359 case 4:
1c213d19 2360 /* 32 bit write access */
c27004ec 2361 val = ldl_p(buf);
3b643495
PM
2362 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2363 attrs);
23326164
RH
2364 break;
2365 case 2:
1c213d19 2366 /* 16 bit write access */
c27004ec 2367 val = lduw_p(buf);
3b643495
PM
2368 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2369 attrs);
23326164
RH
2370 break;
2371 case 1:
1c213d19 2372 /* 8 bit write access */
c27004ec 2373 val = ldub_p(buf);
3b643495
PM
2374 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2375 attrs);
23326164
RH
2376 break;
2377 default:
2378 abort();
13eb76e0 2379 }
2bbfa05d 2380 } else {
5c8a00ce 2381 addr1 += memory_region_get_ram_addr(mr);
13eb76e0 2382 /* RAM case */
5579c7f3 2383 ptr = qemu_get_ram_ptr(addr1);
13eb76e0 2384 memcpy(ptr, buf, l);
51d7a9eb 2385 invalidate_and_set_dirty(addr1, l);
13eb76e0
FB
2386 }
2387 } else {
5c8a00ce 2388 if (!memory_access_is_direct(mr, is_write)) {
13eb76e0 2389 /* I/O case */
5c8a00ce 2390 l = memory_access_size(mr, l, addr1);
23326164
RH
2391 switch (l) {
2392 case 8:
2393 /* 64 bit read access */
3b643495
PM
2394 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2395 attrs);
23326164
RH
2396 stq_p(buf, val);
2397 break;
2398 case 4:
13eb76e0 2399 /* 32 bit read access */
3b643495
PM
2400 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2401 attrs);
c27004ec 2402 stl_p(buf, val);
23326164
RH
2403 break;
2404 case 2:
13eb76e0 2405 /* 16 bit read access */
3b643495
PM
2406 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2407 attrs);
c27004ec 2408 stw_p(buf, val);
23326164
RH
2409 break;
2410 case 1:
1c213d19 2411 /* 8 bit read access */
3b643495
PM
2412 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2413 attrs);
c27004ec 2414 stb_p(buf, val);
23326164
RH
2415 break;
2416 default:
2417 abort();
13eb76e0
FB
2418 }
2419 } else {
2420 /* RAM case */
5c8a00ce 2421 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
f3705d53 2422 memcpy(buf, ptr, l);
13eb76e0
FB
2423 }
2424 }
2425 len -= l;
2426 buf += l;
2427 addr += l;
2428 }
fd8aaa76 2429
3b643495 2430 return result;
13eb76e0 2431}
8df1cd07 2432
5c9eb028
PM
2433MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2434 const uint8_t *buf, int len)
ac1970fb 2435{
5c9eb028 2436 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
ac1970fb
AK
2437}
2438
5c9eb028
PM
2439MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2440 uint8_t *buf, int len)
ac1970fb 2441{
5c9eb028 2442 return address_space_rw(as, addr, attrs, buf, len, false);
ac1970fb
AK
2443}
2444
2445
a8170e5e 2446void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
ac1970fb
AK
2447 int len, int is_write)
2448{
5c9eb028
PM
2449 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2450 buf, len, is_write);
ac1970fb
AK
2451}
2452
582b55a9
AG
2453enum write_rom_type {
2454 WRITE_DATA,
2455 FLUSH_CACHE,
2456};
2457
2a221651 2458static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
582b55a9 2459 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
d0ecd2aa 2460{
149f54b5 2461 hwaddr l;
d0ecd2aa 2462 uint8_t *ptr;
149f54b5 2463 hwaddr addr1;
5c8a00ce 2464 MemoryRegion *mr;
3b46e624 2465
d0ecd2aa 2466 while (len > 0) {
149f54b5 2467 l = len;
2a221651 2468 mr = address_space_translate(as, addr, &addr1, &l, true);
3b46e624 2469
5c8a00ce
PB
2470 if (!(memory_region_is_ram(mr) ||
2471 memory_region_is_romd(mr))) {
d0ecd2aa
FB
2472 /* do nothing */
2473 } else {
5c8a00ce 2474 addr1 += memory_region_get_ram_addr(mr);
d0ecd2aa 2475 /* ROM/RAM case */
5579c7f3 2476 ptr = qemu_get_ram_ptr(addr1);
582b55a9
AG
2477 switch (type) {
2478 case WRITE_DATA:
2479 memcpy(ptr, buf, l);
2480 invalidate_and_set_dirty(addr1, l);
2481 break;
2482 case FLUSH_CACHE:
2483 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2484 break;
2485 }
d0ecd2aa
FB
2486 }
2487 len -= l;
2488 buf += l;
2489 addr += l;
2490 }
2491}
2492
582b55a9 2493/* used for ROM loading : can write in RAM and ROM */
2a221651 2494void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
582b55a9
AG
2495 const uint8_t *buf, int len)
2496{
2a221651 2497 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
582b55a9
AG
2498}
2499
2500void cpu_flush_icache_range(hwaddr start, int len)
2501{
2502 /*
2503 * This function should do the same thing as an icache flush that was
2504 * triggered from within the guest. For TCG we are always cache coherent,
2505 * so there is no need to flush anything. For KVM / Xen we need to flush
2506 * the host's instruction cache at least.
2507 */
2508 if (tcg_enabled()) {
2509 return;
2510 }
2511
2a221651
EI
2512 cpu_physical_memory_write_rom_internal(&address_space_memory,
2513 start, NULL, len, FLUSH_CACHE);
582b55a9
AG
2514}
2515
6d16c2f8 2516typedef struct {
d3e71559 2517 MemoryRegion *mr;
6d16c2f8 2518 void *buffer;
a8170e5e
AK
2519 hwaddr addr;
2520 hwaddr len;
6d16c2f8
AL
2521} BounceBuffer;
2522
2523static BounceBuffer bounce;
2524
ba223c29
AL
2525typedef struct MapClient {
2526 void *opaque;
2527 void (*callback)(void *opaque);
72cf2d4f 2528 QLIST_ENTRY(MapClient) link;
ba223c29
AL
2529} MapClient;
2530
72cf2d4f
BS
2531static QLIST_HEAD(map_client_list, MapClient) map_client_list
2532 = QLIST_HEAD_INITIALIZER(map_client_list);
ba223c29
AL
2533
2534void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2535{
7267c094 2536 MapClient *client = g_malloc(sizeof(*client));
ba223c29
AL
2537
2538 client->opaque = opaque;
2539 client->callback = callback;
72cf2d4f 2540 QLIST_INSERT_HEAD(&map_client_list, client, link);
ba223c29
AL
2541 return client;
2542}
2543
8b9c99d9 2544static void cpu_unregister_map_client(void *_client)
ba223c29
AL
2545{
2546 MapClient *client = (MapClient *)_client;
2547
72cf2d4f 2548 QLIST_REMOVE(client, link);
7267c094 2549 g_free(client);
ba223c29
AL
2550}
2551
2552static void cpu_notify_map_clients(void)
2553{
2554 MapClient *client;
2555
72cf2d4f
BS
2556 while (!QLIST_EMPTY(&map_client_list)) {
2557 client = QLIST_FIRST(&map_client_list);
ba223c29 2558 client->callback(client->opaque);
34d5e948 2559 cpu_unregister_map_client(client);
ba223c29
AL
2560 }
2561}
2562
51644ab7
PB
2563bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2564{
5c8a00ce 2565 MemoryRegion *mr;
51644ab7
PB
2566 hwaddr l, xlat;
2567
2568 while (len > 0) {
2569 l = len;
5c8a00ce
PB
2570 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2571 if (!memory_access_is_direct(mr, is_write)) {
2572 l = memory_access_size(mr, l, addr);
2573 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
51644ab7
PB
2574 return false;
2575 }
2576 }
2577
2578 len -= l;
2579 addr += l;
2580 }
2581 return true;
2582}
2583
6d16c2f8
AL
2584/* Map a physical memory region into a host virtual address.
2585 * May map a subset of the requested range, given by and returned in *plen.
2586 * May return NULL if resources needed to perform the mapping are exhausted.
2587 * Use only for reads OR writes - not for read-modify-write operations.
ba223c29
AL
2588 * Use cpu_register_map_client() to know when retrying the map operation is
2589 * likely to succeed.
6d16c2f8 2590 */
ac1970fb 2591void *address_space_map(AddressSpace *as,
a8170e5e
AK
2592 hwaddr addr,
2593 hwaddr *plen,
ac1970fb 2594 bool is_write)
6d16c2f8 2595{
a8170e5e 2596 hwaddr len = *plen;
e3127ae0
PB
2597 hwaddr done = 0;
2598 hwaddr l, xlat, base;
2599 MemoryRegion *mr, *this_mr;
2600 ram_addr_t raddr;
6d16c2f8 2601
e3127ae0
PB
2602 if (len == 0) {
2603 return NULL;
2604 }
38bee5dc 2605
e3127ae0
PB
2606 l = len;
2607 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2608 if (!memory_access_is_direct(mr, is_write)) {
2609 if (bounce.buffer) {
2610 return NULL;
6d16c2f8 2611 }
e85d9db5
KW
2612 /* Avoid unbounded allocations */
2613 l = MIN(l, TARGET_PAGE_SIZE);
2614 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
e3127ae0
PB
2615 bounce.addr = addr;
2616 bounce.len = l;
d3e71559
PB
2617
2618 memory_region_ref(mr);
2619 bounce.mr = mr;
e3127ae0 2620 if (!is_write) {
5c9eb028
PM
2621 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2622 bounce.buffer, l);
8ab934f9 2623 }
6d16c2f8 2624
e3127ae0
PB
2625 *plen = l;
2626 return bounce.buffer;
2627 }
2628
2629 base = xlat;
2630 raddr = memory_region_get_ram_addr(mr);
2631
2632 for (;;) {
6d16c2f8
AL
2633 len -= l;
2634 addr += l;
e3127ae0
PB
2635 done += l;
2636 if (len == 0) {
2637 break;
2638 }
2639
2640 l = len;
2641 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2642 if (this_mr != mr || xlat != base + done) {
2643 break;
2644 }
6d16c2f8 2645 }
e3127ae0 2646
d3e71559 2647 memory_region_ref(mr);
e3127ae0
PB
2648 *plen = done;
2649 return qemu_ram_ptr_length(raddr + base, plen);
6d16c2f8
AL
2650}
2651
ac1970fb 2652/* Unmaps a memory region previously mapped by address_space_map().
6d16c2f8
AL
2653 * Will also mark the memory as dirty if is_write == 1. access_len gives
2654 * the amount of memory that was actually read or written by the caller.
2655 */
a8170e5e
AK
2656void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2657 int is_write, hwaddr access_len)
6d16c2f8
AL
2658{
2659 if (buffer != bounce.buffer) {
d3e71559
PB
2660 MemoryRegion *mr;
2661 ram_addr_t addr1;
2662
2663 mr = qemu_ram_addr_from_host(buffer, &addr1);
2664 assert(mr != NULL);
6d16c2f8 2665 if (is_write) {
6886867e 2666 invalidate_and_set_dirty(addr1, access_len);
6d16c2f8 2667 }
868bb33f 2668 if (xen_enabled()) {
e41d7c69 2669 xen_invalidate_map_cache_entry(buffer);
050a0ddf 2670 }
d3e71559 2671 memory_region_unref(mr);
6d16c2f8
AL
2672 return;
2673 }
2674 if (is_write) {
5c9eb028
PM
2675 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2676 bounce.buffer, access_len);
6d16c2f8 2677 }
f8a83245 2678 qemu_vfree(bounce.buffer);
6d16c2f8 2679 bounce.buffer = NULL;
d3e71559 2680 memory_region_unref(bounce.mr);
ba223c29 2681 cpu_notify_map_clients();
6d16c2f8 2682}
d0ecd2aa 2683
a8170e5e
AK
2684void *cpu_physical_memory_map(hwaddr addr,
2685 hwaddr *plen,
ac1970fb
AK
2686 int is_write)
2687{
2688 return address_space_map(&address_space_memory, addr, plen, is_write);
2689}
2690
a8170e5e
AK
2691void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2692 int is_write, hwaddr access_len)
ac1970fb
AK
2693{
2694 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2695}
2696
8df1cd07 2697/* warning: addr must be aligned */
50013115
PM
2698static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2699 MemTxAttrs attrs,
2700 MemTxResult *result,
2701 enum device_endian endian)
8df1cd07 2702{
8df1cd07 2703 uint8_t *ptr;
791af8c8 2704 uint64_t val;
5c8a00ce 2705 MemoryRegion *mr;
149f54b5
PB
2706 hwaddr l = 4;
2707 hwaddr addr1;
50013115 2708 MemTxResult r;
8df1cd07 2709
fdfba1a2 2710 mr = address_space_translate(as, addr, &addr1, &l, false);
5c8a00ce 2711 if (l < 4 || !memory_access_is_direct(mr, false)) {
8df1cd07 2712 /* I/O case */
50013115 2713 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
1e78bcc1
AG
2714#if defined(TARGET_WORDS_BIGENDIAN)
2715 if (endian == DEVICE_LITTLE_ENDIAN) {
2716 val = bswap32(val);
2717 }
2718#else
2719 if (endian == DEVICE_BIG_ENDIAN) {
2720 val = bswap32(val);
2721 }
2722#endif
8df1cd07
FB
2723 } else {
2724 /* RAM case */
5c8a00ce 2725 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2726 & TARGET_PAGE_MASK)
149f54b5 2727 + addr1);
1e78bcc1
AG
2728 switch (endian) {
2729 case DEVICE_LITTLE_ENDIAN:
2730 val = ldl_le_p(ptr);
2731 break;
2732 case DEVICE_BIG_ENDIAN:
2733 val = ldl_be_p(ptr);
2734 break;
2735 default:
2736 val = ldl_p(ptr);
2737 break;
2738 }
50013115
PM
2739 r = MEMTX_OK;
2740 }
2741 if (result) {
2742 *result = r;
8df1cd07
FB
2743 }
2744 return val;
2745}
2746
50013115
PM
2747uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2748 MemTxAttrs attrs, MemTxResult *result)
2749{
2750 return address_space_ldl_internal(as, addr, attrs, result,
2751 DEVICE_NATIVE_ENDIAN);
2752}
2753
2754uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2755 MemTxAttrs attrs, MemTxResult *result)
2756{
2757 return address_space_ldl_internal(as, addr, attrs, result,
2758 DEVICE_LITTLE_ENDIAN);
2759}
2760
2761uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2762 MemTxAttrs attrs, MemTxResult *result)
2763{
2764 return address_space_ldl_internal(as, addr, attrs, result,
2765 DEVICE_BIG_ENDIAN);
2766}
2767
fdfba1a2 2768uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2769{
50013115 2770 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2771}
2772
fdfba1a2 2773uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2774{
50013115 2775 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2776}
2777
fdfba1a2 2778uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2779{
50013115 2780 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2781}
2782
84b7b8e7 2783/* warning: addr must be aligned */
50013115
PM
2784static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2785 MemTxAttrs attrs,
2786 MemTxResult *result,
2787 enum device_endian endian)
84b7b8e7 2788{
84b7b8e7
FB
2789 uint8_t *ptr;
2790 uint64_t val;
5c8a00ce 2791 MemoryRegion *mr;
149f54b5
PB
2792 hwaddr l = 8;
2793 hwaddr addr1;
50013115 2794 MemTxResult r;
84b7b8e7 2795
2c17449b 2796 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2797 false);
2798 if (l < 8 || !memory_access_is_direct(mr, false)) {
84b7b8e7 2799 /* I/O case */
50013115 2800 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
968a5627
PB
2801#if defined(TARGET_WORDS_BIGENDIAN)
2802 if (endian == DEVICE_LITTLE_ENDIAN) {
2803 val = bswap64(val);
2804 }
2805#else
2806 if (endian == DEVICE_BIG_ENDIAN) {
2807 val = bswap64(val);
2808 }
84b7b8e7
FB
2809#endif
2810 } else {
2811 /* RAM case */
5c8a00ce 2812 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2813 & TARGET_PAGE_MASK)
149f54b5 2814 + addr1);
1e78bcc1
AG
2815 switch (endian) {
2816 case DEVICE_LITTLE_ENDIAN:
2817 val = ldq_le_p(ptr);
2818 break;
2819 case DEVICE_BIG_ENDIAN:
2820 val = ldq_be_p(ptr);
2821 break;
2822 default:
2823 val = ldq_p(ptr);
2824 break;
2825 }
50013115
PM
2826 r = MEMTX_OK;
2827 }
2828 if (result) {
2829 *result = r;
84b7b8e7
FB
2830 }
2831 return val;
2832}
2833
50013115
PM
2834uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
2835 MemTxAttrs attrs, MemTxResult *result)
2836{
2837 return address_space_ldq_internal(as, addr, attrs, result,
2838 DEVICE_NATIVE_ENDIAN);
2839}
2840
2841uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
2842 MemTxAttrs attrs, MemTxResult *result)
2843{
2844 return address_space_ldq_internal(as, addr, attrs, result,
2845 DEVICE_LITTLE_ENDIAN);
2846}
2847
2848uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
2849 MemTxAttrs attrs, MemTxResult *result)
2850{
2851 return address_space_ldq_internal(as, addr, attrs, result,
2852 DEVICE_BIG_ENDIAN);
2853}
2854
2c17449b 2855uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2856{
50013115 2857 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2858}
2859
2c17449b 2860uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2861{
50013115 2862 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2863}
2864
2c17449b 2865uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2866{
50013115 2867 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2868}
2869
aab33094 2870/* XXX: optimize */
50013115
PM
2871uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
2872 MemTxAttrs attrs, MemTxResult *result)
aab33094
FB
2873{
2874 uint8_t val;
50013115
PM
2875 MemTxResult r;
2876
2877 r = address_space_rw(as, addr, attrs, &val, 1, 0);
2878 if (result) {
2879 *result = r;
2880 }
aab33094
FB
2881 return val;
2882}
2883
50013115
PM
2884uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2885{
2886 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2887}
2888
733f0b02 2889/* warning: addr must be aligned */
50013115
PM
2890static inline uint32_t address_space_lduw_internal(AddressSpace *as,
2891 hwaddr addr,
2892 MemTxAttrs attrs,
2893 MemTxResult *result,
2894 enum device_endian endian)
aab33094 2895{
733f0b02
MT
2896 uint8_t *ptr;
2897 uint64_t val;
5c8a00ce 2898 MemoryRegion *mr;
149f54b5
PB
2899 hwaddr l = 2;
2900 hwaddr addr1;
50013115 2901 MemTxResult r;
733f0b02 2902
41701aa4 2903 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2904 false);
2905 if (l < 2 || !memory_access_is_direct(mr, false)) {
733f0b02 2906 /* I/O case */
50013115 2907 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
1e78bcc1
AG
2908#if defined(TARGET_WORDS_BIGENDIAN)
2909 if (endian == DEVICE_LITTLE_ENDIAN) {
2910 val = bswap16(val);
2911 }
2912#else
2913 if (endian == DEVICE_BIG_ENDIAN) {
2914 val = bswap16(val);
2915 }
2916#endif
733f0b02
MT
2917 } else {
2918 /* RAM case */
5c8a00ce 2919 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2920 & TARGET_PAGE_MASK)
149f54b5 2921 + addr1);
1e78bcc1
AG
2922 switch (endian) {
2923 case DEVICE_LITTLE_ENDIAN:
2924 val = lduw_le_p(ptr);
2925 break;
2926 case DEVICE_BIG_ENDIAN:
2927 val = lduw_be_p(ptr);
2928 break;
2929 default:
2930 val = lduw_p(ptr);
2931 break;
2932 }
50013115
PM
2933 r = MEMTX_OK;
2934 }
2935 if (result) {
2936 *result = r;
733f0b02
MT
2937 }
2938 return val;
aab33094
FB
2939}
2940
50013115
PM
2941uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
2942 MemTxAttrs attrs, MemTxResult *result)
2943{
2944 return address_space_lduw_internal(as, addr, attrs, result,
2945 DEVICE_NATIVE_ENDIAN);
2946}
2947
2948uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
2949 MemTxAttrs attrs, MemTxResult *result)
2950{
2951 return address_space_lduw_internal(as, addr, attrs, result,
2952 DEVICE_LITTLE_ENDIAN);
2953}
2954
2955uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
2956 MemTxAttrs attrs, MemTxResult *result)
2957{
2958 return address_space_lduw_internal(as, addr, attrs, result,
2959 DEVICE_BIG_ENDIAN);
2960}
2961
41701aa4 2962uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2963{
50013115 2964 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2965}
2966
41701aa4 2967uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2968{
50013115 2969 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2970}
2971
41701aa4 2972uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2973{
50013115 2974 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
2975}
2976
8df1cd07
FB
2977/* warning: addr must be aligned. The ram page is not masked as dirty
2978 and the code inside is not invalidated. It is useful if the dirty
2979 bits are used to track modified PTEs */
50013115
PM
2980void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
2981 MemTxAttrs attrs, MemTxResult *result)
8df1cd07 2982{
8df1cd07 2983 uint8_t *ptr;
5c8a00ce 2984 MemoryRegion *mr;
149f54b5
PB
2985 hwaddr l = 4;
2986 hwaddr addr1;
50013115 2987 MemTxResult r;
8df1cd07 2988
2198a121 2989 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2990 true);
2991 if (l < 4 || !memory_access_is_direct(mr, true)) {
50013115 2992 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
8df1cd07 2993 } else {
5c8a00ce 2994 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2995 ptr = qemu_get_ram_ptr(addr1);
8df1cd07 2996 stl_p(ptr, val);
74576198
AL
2997
2998 if (unlikely(in_migration)) {
a2cd8c85 2999 if (cpu_physical_memory_is_clean(addr1)) {
74576198
AL
3000 /* invalidate code */
3001 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3002 /* set dirty bit */
6886867e 3003 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
74576198
AL
3004 }
3005 }
50013115
PM
3006 r = MEMTX_OK;
3007 }
3008 if (result) {
3009 *result = r;
8df1cd07
FB
3010 }
3011}
3012
50013115
PM
3013void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3014{
3015 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3016}
3017
8df1cd07 3018/* warning: addr must be aligned */
50013115
PM
3019static inline void address_space_stl_internal(AddressSpace *as,
3020 hwaddr addr, uint32_t val,
3021 MemTxAttrs attrs,
3022 MemTxResult *result,
3023 enum device_endian endian)
8df1cd07 3024{
8df1cd07 3025 uint8_t *ptr;
5c8a00ce 3026 MemoryRegion *mr;
149f54b5
PB
3027 hwaddr l = 4;
3028 hwaddr addr1;
50013115 3029 MemTxResult r;
8df1cd07 3030
ab1da857 3031 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
3032 true);
3033 if (l < 4 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
3034#if defined(TARGET_WORDS_BIGENDIAN)
3035 if (endian == DEVICE_LITTLE_ENDIAN) {
3036 val = bswap32(val);
3037 }
3038#else
3039 if (endian == DEVICE_BIG_ENDIAN) {
3040 val = bswap32(val);
3041 }
3042#endif
50013115 3043 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
8df1cd07 3044 } else {
8df1cd07 3045 /* RAM case */
5c8a00ce 3046 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 3047 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
3048 switch (endian) {
3049 case DEVICE_LITTLE_ENDIAN:
3050 stl_le_p(ptr, val);
3051 break;
3052 case DEVICE_BIG_ENDIAN:
3053 stl_be_p(ptr, val);
3054 break;
3055 default:
3056 stl_p(ptr, val);
3057 break;
3058 }
51d7a9eb 3059 invalidate_and_set_dirty(addr1, 4);
50013115
PM
3060 r = MEMTX_OK;
3061 }
3062 if (result) {
3063 *result = r;
8df1cd07
FB
3064 }
3065}
3066
50013115
PM
3067void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3068 MemTxAttrs attrs, MemTxResult *result)
3069{
3070 address_space_stl_internal(as, addr, val, attrs, result,
3071 DEVICE_NATIVE_ENDIAN);
3072}
3073
3074void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3075 MemTxAttrs attrs, MemTxResult *result)
3076{
3077 address_space_stl_internal(as, addr, val, attrs, result,
3078 DEVICE_LITTLE_ENDIAN);
3079}
3080
3081void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3082 MemTxAttrs attrs, MemTxResult *result)
3083{
3084 address_space_stl_internal(as, addr, val, attrs, result,
3085 DEVICE_BIG_ENDIAN);
3086}
3087
ab1da857 3088void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3089{
50013115 3090 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3091}
3092
ab1da857 3093void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3094{
50013115 3095 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3096}
3097
ab1da857 3098void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3099{
50013115 3100 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3101}
3102
aab33094 3103/* XXX: optimize */
50013115
PM
3104void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3105 MemTxAttrs attrs, MemTxResult *result)
aab33094
FB
3106{
3107 uint8_t v = val;
50013115
PM
3108 MemTxResult r;
3109
3110 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3111 if (result) {
3112 *result = r;
3113 }
3114}
3115
3116void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3117{
3118 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
aab33094
FB
3119}
3120
733f0b02 3121/* warning: addr must be aligned */
50013115
PM
3122static inline void address_space_stw_internal(AddressSpace *as,
3123 hwaddr addr, uint32_t val,
3124 MemTxAttrs attrs,
3125 MemTxResult *result,
3126 enum device_endian endian)
aab33094 3127{
733f0b02 3128 uint8_t *ptr;
5c8a00ce 3129 MemoryRegion *mr;
149f54b5
PB
3130 hwaddr l = 2;
3131 hwaddr addr1;
50013115 3132 MemTxResult r;
733f0b02 3133
5ce5944d 3134 mr = address_space_translate(as, addr, &addr1, &l, true);
5c8a00ce 3135 if (l < 2 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
3136#if defined(TARGET_WORDS_BIGENDIAN)
3137 if (endian == DEVICE_LITTLE_ENDIAN) {
3138 val = bswap16(val);
3139 }
3140#else
3141 if (endian == DEVICE_BIG_ENDIAN) {
3142 val = bswap16(val);
3143 }
3144#endif
50013115 3145 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
733f0b02 3146 } else {
733f0b02 3147 /* RAM case */
5c8a00ce 3148 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
733f0b02 3149 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
3150 switch (endian) {
3151 case DEVICE_LITTLE_ENDIAN:
3152 stw_le_p(ptr, val);
3153 break;
3154 case DEVICE_BIG_ENDIAN:
3155 stw_be_p(ptr, val);
3156 break;
3157 default:
3158 stw_p(ptr, val);
3159 break;
3160 }
51d7a9eb 3161 invalidate_and_set_dirty(addr1, 2);
50013115
PM
3162 r = MEMTX_OK;
3163 }
3164 if (result) {
3165 *result = r;
733f0b02 3166 }
aab33094
FB
3167}
3168
50013115
PM
3169void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3170 MemTxAttrs attrs, MemTxResult *result)
3171{
3172 address_space_stw_internal(as, addr, val, attrs, result,
3173 DEVICE_NATIVE_ENDIAN);
3174}
3175
3176void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3177 MemTxAttrs attrs, MemTxResult *result)
3178{
3179 address_space_stw_internal(as, addr, val, attrs, result,
3180 DEVICE_LITTLE_ENDIAN);
3181}
3182
3183void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3184 MemTxAttrs attrs, MemTxResult *result)
3185{
3186 address_space_stw_internal(as, addr, val, attrs, result,
3187 DEVICE_BIG_ENDIAN);
3188}
3189
5ce5944d 3190void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3191{
50013115 3192 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3193}
3194
5ce5944d 3195void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3196{
50013115 3197 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3198}
3199
5ce5944d 3200void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3201{
50013115 3202 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3203}
3204
aab33094 3205/* XXX: optimize */
50013115
PM
3206void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3207 MemTxAttrs attrs, MemTxResult *result)
aab33094 3208{
50013115 3209 MemTxResult r;
aab33094 3210 val = tswap64(val);
50013115
PM
3211 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3212 if (result) {
3213 *result = r;
3214 }
aab33094
FB
3215}
3216
50013115
PM
3217void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3218 MemTxAttrs attrs, MemTxResult *result)
1e78bcc1 3219{
50013115 3220 MemTxResult r;
1e78bcc1 3221 val = cpu_to_le64(val);
50013115
PM
3222 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3223 if (result) {
3224 *result = r;
3225 }
3226}
3227void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3228 MemTxAttrs attrs, MemTxResult *result)
3229{
3230 MemTxResult r;
3231 val = cpu_to_be64(val);
3232 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3233 if (result) {
3234 *result = r;
3235 }
3236}
3237
3238void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3239{
3240 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3241}
3242
3243void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3244{
3245 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3246}
3247
f606604f 3248void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1 3249{
50013115 3250 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
1e78bcc1
AG
3251}
3252
5e2972fd 3253/* virtual memory access for debug (includes writing to ROM) */
f17ec444 3254int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
b448f2f3 3255 uint8_t *buf, int len, int is_write)
13eb76e0
FB
3256{
3257 int l;
a8170e5e 3258 hwaddr phys_addr;
9b3c35e0 3259 target_ulong page;
13eb76e0
FB
3260
3261 while (len > 0) {
3262 page = addr & TARGET_PAGE_MASK;
f17ec444 3263 phys_addr = cpu_get_phys_page_debug(cpu, page);
13eb76e0
FB
3264 /* if no physical page mapped, return an error */
3265 if (phys_addr == -1)
3266 return -1;
3267 l = (page + TARGET_PAGE_SIZE) - addr;
3268 if (l > len)
3269 l = len;
5e2972fd 3270 phys_addr += (addr & ~TARGET_PAGE_MASK);
2e38847b
EI
3271 if (is_write) {
3272 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3273 } else {
5c9eb028
PM
3274 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3275 buf, l, 0);
2e38847b 3276 }
13eb76e0
FB
3277 len -= l;
3278 buf += l;
3279 addr += l;
3280 }
3281 return 0;
3282}
a68fe89c 3283#endif
13eb76e0 3284
8e4a424b
BS
3285/*
3286 * A helper function for the _utterly broken_ virtio device model to find out if
3287 * it's running on a big endian machine. Don't do this at home kids!
3288 */
98ed8ecf
GK
3289bool target_words_bigendian(void);
3290bool target_words_bigendian(void)
8e4a424b
BS
3291{
3292#if defined(TARGET_WORDS_BIGENDIAN)
3293 return true;
3294#else
3295 return false;
3296#endif
3297}
3298
76f35538 3299#ifndef CONFIG_USER_ONLY
a8170e5e 3300bool cpu_physical_memory_is_io(hwaddr phys_addr)
76f35538 3301{
5c8a00ce 3302 MemoryRegion*mr;
149f54b5 3303 hwaddr l = 1;
76f35538 3304
5c8a00ce
PB
3305 mr = address_space_translate(&address_space_memory,
3306 phys_addr, &phys_addr, &l, false);
76f35538 3307
5c8a00ce
PB
3308 return !(memory_region_is_ram(mr) ||
3309 memory_region_is_romd(mr));
76f35538 3310}
bd2fa51f
MH
3311
3312void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3313{
3314 RAMBlock *block;
3315
0dc3f44a
MD
3316 rcu_read_lock();
3317 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 3318 func(block->host, block->offset, block->used_length, opaque);
bd2fa51f 3319 }
0dc3f44a 3320 rcu_read_unlock();
bd2fa51f 3321}
ec3f8c99 3322#endif