]> git.proxmox.com Git - mirror_qemu.git/blame - exec.c
dma-helpers: Fix race condition of continue_after_map_failure and dma_aio_cancel
[mirror_qemu.git] / exec.c
CommitLineData
54936004 1/*
5b6dd868 2 * Virtual page mapping
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
8167ee88 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
67b915a5 19#include "config.h"
777872e5 20#ifndef _WIN32
a98d49b1 21#include <sys/types.h>
d5a8f07c
FB
22#include <sys/mman.h>
23#endif
54936004 24
055403b2 25#include "qemu-common.h"
6180a181 26#include "cpu.h"
b67d9a52 27#include "tcg.h"
b3c7724c 28#include "hw/hw.h"
4485bd26 29#if !defined(CONFIG_USER_ONLY)
47c8ca53 30#include "hw/boards.h"
4485bd26 31#endif
cc9e98cb 32#include "hw/qdev.h"
1de7afc9 33#include "qemu/osdep.h"
9c17d615 34#include "sysemu/kvm.h"
2ff3de68 35#include "sysemu/sysemu.h"
0d09e41a 36#include "hw/xen/xen.h"
1de7afc9
PB
37#include "qemu/timer.h"
38#include "qemu/config-file.h"
75a34036 39#include "qemu/error-report.h"
022c62cb 40#include "exec/memory.h"
9c17d615 41#include "sysemu/dma.h"
022c62cb 42#include "exec/address-spaces.h"
53a5960a
PB
43#if defined(CONFIG_USER_ONLY)
44#include <qemu.h>
432d268c 45#else /* !CONFIG_USER_ONLY */
9c17d615 46#include "sysemu/xen-mapcache.h"
6506e4f9 47#include "trace.h"
53a5960a 48#endif
0d6d3c87 49#include "exec/cpu-all.h"
0dc3f44a 50#include "qemu/rcu_queue.h"
022c62cb 51#include "exec/cputlb.h"
5b6dd868 52#include "translate-all.h"
0cac1b66 53
022c62cb 54#include "exec/memory-internal.h"
220c3ebd 55#include "exec/ram_addr.h"
67d95c15 56
b35ba30f
MT
57#include "qemu/range.h"
58
db7b5426 59//#define DEBUG_SUBPAGE
1196be37 60
e2eef170 61#if !defined(CONFIG_USER_ONLY)
981fdf23 62static bool in_migration;
94a6b54f 63
0dc3f44a
MD
64/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
0d53d9fe 67RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
62152b8a
AK
68
69static MemoryRegion *system_memory;
309cb471 70static MemoryRegion *system_io;
62152b8a 71
f6790af6
AK
72AddressSpace address_space_io;
73AddressSpace address_space_memory;
2673a5da 74
0844e007 75MemoryRegion io_mem_rom, io_mem_notdirty;
acc9d80b 76static MemoryRegion io_mem_unassigned;
0e0df1e2 77
7bd4f430
PB
78/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79#define RAM_PREALLOC (1 << 0)
80
dbcb8981
PB
81/* RAM is mmap-ed with MAP_SHARED */
82#define RAM_SHARED (1 << 1)
83
62be4e3a
MT
84/* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87#define RAM_RESIZEABLE (1 << 2)
88
e2eef170 89#endif
9fa3e853 90
bdc44640 91struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
6a00d601
FB
92/* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
4917cf44 94DEFINE_TLS(CPUState *, current_cpu);
2e70f6ef 95/* 0 = Do not count executed instructions.
bf20dc07 96 1 = Precise instruction counting.
2e70f6ef 97 2 = Adaptive rate instruction counting. */
5708fc66 98int use_icount;
6a00d601 99
e2eef170 100#if !defined(CONFIG_USER_ONLY)
4346ae3e 101
1db8abb1
PB
102typedef struct PhysPageEntry PhysPageEntry;
103
104struct PhysPageEntry {
9736e55b 105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
8b795765 106 uint32_t skip : 6;
9736e55b 107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
8b795765 108 uint32_t ptr : 26;
1db8abb1
PB
109};
110
8b795765
MT
111#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
03f49957 113/* Size of the L2 (and L3, etc) page tables. */
57271d63 114#define ADDR_SPACE_BITS 64
03f49957 115
026736ce 116#define P_L2_BITS 9
03f49957
PB
117#define P_L2_SIZE (1 << P_L2_BITS)
118
119#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121typedef PhysPageEntry Node[P_L2_SIZE];
0475d94f 122
53cb28cb 123typedef struct PhysPageMap {
79e2b9ae
PB
124 struct rcu_head rcu;
125
53cb28cb
MA
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132} PhysPageMap;
133
1db8abb1 134struct AddressSpaceDispatch {
79e2b9ae
PB
135 struct rcu_head rcu;
136
1db8abb1
PB
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
53cb28cb 141 PhysPageMap map;
acc9d80b 142 AddressSpace *as;
1db8abb1
PB
143};
144
90260c6c
JK
145#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146typedef struct subpage_t {
147 MemoryRegion iomem;
acc9d80b 148 AddressSpace *as;
90260c6c
JK
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151} subpage_t;
152
b41aac4f
LPF
153#define PHYS_SECTION_UNASSIGNED 0
154#define PHYS_SECTION_NOTDIRTY 1
155#define PHYS_SECTION_ROM 2
156#define PHYS_SECTION_WATCH 3
5312bd8b 157
e2eef170 158static void io_mem_init(void);
62152b8a 159static void memory_map_init(void);
09daed84 160static void tcg_commit(MemoryListener *listener);
e2eef170 161
1ec9b909 162static MemoryRegion io_mem_watch;
6658ffb8 163#endif
fd6ce8f6 164
6d9a1304 165#if !defined(CONFIG_USER_ONLY)
d6f2ea22 166
53cb28cb 167static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
d6f2ea22 168{
53cb28cb
MA
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
d6f2ea22 173 }
f7bf5461
AK
174}
175
53cb28cb 176static uint32_t phys_map_node_alloc(PhysPageMap *map)
f7bf5461
AK
177{
178 unsigned i;
8b795765 179 uint32_t ret;
f7bf5461 180
53cb28cb 181 ret = map->nodes_nb++;
f7bf5461 182 assert(ret != PHYS_MAP_NODE_NIL);
53cb28cb 183 assert(ret != map->nodes_nb_alloc);
03f49957 184 for (i = 0; i < P_L2_SIZE; ++i) {
53cb28cb
MA
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
d6f2ea22 187 }
f7bf5461 188 return ret;
d6f2ea22
AK
189}
190
53cb28cb
MA
191static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
2999097b 193 int level)
f7bf5461
AK
194{
195 PhysPageEntry *p;
196 int i;
03f49957 197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
108c49b8 198
9736e55b 199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
53cb28cb
MA
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
f7bf5461 202 if (level == 0) {
03f49957 203 for (i = 0; i < P_L2_SIZE; i++) {
9736e55b 204 p[i].skip = 0;
b41aac4f 205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
4346ae3e 206 }
67c4d23c 207 }
f7bf5461 208 } else {
53cb28cb 209 p = map->nodes[lp->ptr];
92e873b9 210 }
03f49957 211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
f7bf5461 212
03f49957 213 while (*nb && lp < &p[P_L2_SIZE]) {
07f07b31 214 if ((*index & (step - 1)) == 0 && *nb >= step) {
9736e55b 215 lp->skip = 0;
c19e8800 216 lp->ptr = leaf;
07f07b31
AK
217 *index += step;
218 *nb -= step;
2999097b 219 } else {
53cb28cb 220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2999097b
AK
221 }
222 ++lp;
f7bf5461
AK
223 }
224}
225
ac1970fb 226static void phys_page_set(AddressSpaceDispatch *d,
a8170e5e 227 hwaddr index, hwaddr nb,
2999097b 228 uint16_t leaf)
f7bf5461 229{
2999097b 230 /* Wildly overreserve - it doesn't matter much. */
53cb28cb 231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
5cd2c5b6 232
53cb28cb 233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
92e873b9
FB
234}
235
b35ba30f
MT
236/* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240{
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287}
288
289static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290{
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
53cb28cb 294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
b35ba30f
MT
295 }
296}
297
97115a8d 298static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
9affd6fc 299 Node *nodes, MemoryRegionSection *sections)
92e873b9 300{
31ab2b4a 301 PhysPageEntry *p;
97115a8d 302 hwaddr index = addr >> TARGET_PAGE_BITS;
31ab2b4a 303 int i;
f1f6e3b8 304
9736e55b 305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
c19e8800 306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
9affd6fc 307 return &sections[PHYS_SECTION_UNASSIGNED];
31ab2b4a 308 }
9affd6fc 309 p = nodes[lp.ptr];
03f49957 310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
5312bd8b 311 }
b35ba30f
MT
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
f3705d53
AK
320}
321
e5548617
BS
322bool memory_region_is_unassigned(MemoryRegion *mr)
323{
2a8e7499 324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
5b6dd868 325 && mr != &io_mem_watch;
fd6ce8f6 326}
149f54b5 327
79e2b9ae 328/* Called from RCU critical section */
c7086b4a 329static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
90260c6c
JK
330 hwaddr addr,
331 bool resolve_subpage)
9f029603 332{
90260c6c
JK
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
53cb28cb 336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
90260c6c
JK
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
53cb28cb 339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
90260c6c
JK
340 }
341 return section;
9f029603
JK
342}
343
79e2b9ae 344/* Called from RCU critical section */
90260c6c 345static MemoryRegionSection *
c7086b4a 346address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
90260c6c 347 hwaddr *plen, bool resolve_subpage)
149f54b5
PB
348{
349 MemoryRegionSection *section;
a87f3954 350 Int128 diff;
149f54b5 351
c7086b4a 352 section = address_space_lookup_region(d, addr, resolve_subpage);
149f54b5
PB
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
3752a036 360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
149f54b5
PB
361 return section;
362}
90260c6c 363
a87f3954
PB
364static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365{
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374}
375
5c8a00ce
PB
376MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
90260c6c 379{
30951157
AK
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
4025446f 383 hwaddr len = *plen;
30951157 384
79e2b9ae 385 rcu_read_lock();
30951157 386 for (;;) {
79e2b9ae
PB
387 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
388 section = address_space_translate_internal(d, addr, &addr, plen, true);
30951157
AK
389 mr = section->mr;
390
391 if (!mr->iommu_ops) {
392 break;
393 }
394
8d7b8cb9 395 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
30951157
AK
396 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
397 | (addr & iotlb.addr_mask));
4025446f 398 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
30951157
AK
399 if (!(iotlb.perm & (1 << is_write))) {
400 mr = &io_mem_unassigned;
401 break;
402 }
403
404 as = iotlb.target_as;
405 }
406
fe680d0d 407 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
a87f3954 408 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
4025446f 409 len = MIN(page, len);
a87f3954
PB
410 }
411
4025446f 412 *plen = len;
30951157 413 *xlat = addr;
79e2b9ae 414 rcu_read_unlock();
30951157 415 return mr;
90260c6c
JK
416}
417
79e2b9ae 418/* Called from RCU critical section */
90260c6c 419MemoryRegionSection *
9d82b5a7
PB
420address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
421 hwaddr *xlat, hwaddr *plen)
90260c6c 422{
30951157 423 MemoryRegionSection *section;
9d82b5a7
PB
424 section = address_space_translate_internal(cpu->memory_dispatch,
425 addr, xlat, plen, false);
30951157
AK
426
427 assert(!section->mr->iommu_ops);
428 return section;
90260c6c 429}
5b6dd868 430#endif
fd6ce8f6 431
b170fce3 432#if !defined(CONFIG_USER_ONLY)
5b6dd868
BS
433
434static int cpu_common_post_load(void *opaque, int version_id)
fd6ce8f6 435{
259186a7 436 CPUState *cpu = opaque;
a513fe19 437
5b6dd868
BS
438 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
439 version_id is increased. */
259186a7 440 cpu->interrupt_request &= ~0x01;
c01a71c1 441 tlb_flush(cpu, 1);
5b6dd868
BS
442
443 return 0;
a513fe19 444}
7501267e 445
6c3bff0e
PD
446static int cpu_common_pre_load(void *opaque)
447{
448 CPUState *cpu = opaque;
449
adee6424 450 cpu->exception_index = -1;
6c3bff0e
PD
451
452 return 0;
453}
454
455static bool cpu_common_exception_index_needed(void *opaque)
456{
457 CPUState *cpu = opaque;
458
adee6424 459 return tcg_enabled() && cpu->exception_index != -1;
6c3bff0e
PD
460}
461
462static const VMStateDescription vmstate_cpu_common_exception_index = {
463 .name = "cpu_common/exception_index",
464 .version_id = 1,
465 .minimum_version_id = 1,
466 .fields = (VMStateField[]) {
467 VMSTATE_INT32(exception_index, CPUState),
468 VMSTATE_END_OF_LIST()
469 }
470};
471
1a1562f5 472const VMStateDescription vmstate_cpu_common = {
5b6dd868
BS
473 .name = "cpu_common",
474 .version_id = 1,
475 .minimum_version_id = 1,
6c3bff0e 476 .pre_load = cpu_common_pre_load,
5b6dd868 477 .post_load = cpu_common_post_load,
35d08458 478 .fields = (VMStateField[]) {
259186a7
AF
479 VMSTATE_UINT32(halted, CPUState),
480 VMSTATE_UINT32(interrupt_request, CPUState),
5b6dd868 481 VMSTATE_END_OF_LIST()
6c3bff0e
PD
482 },
483 .subsections = (VMStateSubsection[]) {
484 {
485 .vmsd = &vmstate_cpu_common_exception_index,
486 .needed = cpu_common_exception_index_needed,
487 } , {
488 /* empty */
489 }
5b6dd868
BS
490 }
491};
1a1562f5 492
5b6dd868 493#endif
ea041c0e 494
38d8f5c8 495CPUState *qemu_get_cpu(int index)
ea041c0e 496{
bdc44640 497 CPUState *cpu;
ea041c0e 498
bdc44640 499 CPU_FOREACH(cpu) {
55e5c285 500 if (cpu->cpu_index == index) {
bdc44640 501 return cpu;
55e5c285 502 }
ea041c0e 503 }
5b6dd868 504
bdc44640 505 return NULL;
ea041c0e
FB
506}
507
09daed84
EI
508#if !defined(CONFIG_USER_ONLY)
509void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
510{
511 /* We only support one address space per cpu at the moment. */
512 assert(cpu->as == as);
513
514 if (cpu->tcg_as_listener) {
515 memory_listener_unregister(cpu->tcg_as_listener);
516 } else {
517 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
518 }
519 cpu->tcg_as_listener->commit = tcg_commit;
520 memory_listener_register(cpu->tcg_as_listener, as);
521}
522#endif
523
5b6dd868 524void cpu_exec_init(CPUArchState *env)
ea041c0e 525{
5b6dd868 526 CPUState *cpu = ENV_GET_CPU(env);
b170fce3 527 CPUClass *cc = CPU_GET_CLASS(cpu);
bdc44640 528 CPUState *some_cpu;
5b6dd868
BS
529 int cpu_index;
530
531#if defined(CONFIG_USER_ONLY)
532 cpu_list_lock();
533#endif
5b6dd868 534 cpu_index = 0;
bdc44640 535 CPU_FOREACH(some_cpu) {
5b6dd868
BS
536 cpu_index++;
537 }
55e5c285 538 cpu->cpu_index = cpu_index;
1b1ed8dc 539 cpu->numa_node = 0;
f0c3c505 540 QTAILQ_INIT(&cpu->breakpoints);
ff4700b0 541 QTAILQ_INIT(&cpu->watchpoints);
5b6dd868 542#ifndef CONFIG_USER_ONLY
09daed84 543 cpu->as = &address_space_memory;
5b6dd868 544 cpu->thread_id = qemu_get_thread_id();
cba70549 545 cpu_reload_memory_map(cpu);
5b6dd868 546#endif
bdc44640 547 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
5b6dd868
BS
548#if defined(CONFIG_USER_ONLY)
549 cpu_list_unlock();
550#endif
e0d47944
AF
551 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
552 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
553 }
5b6dd868 554#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
5b6dd868
BS
555 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
556 cpu_save, cpu_load, env);
b170fce3 557 assert(cc->vmsd == NULL);
e0d47944 558 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
5b6dd868 559#endif
b170fce3
AF
560 if (cc->vmsd != NULL) {
561 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
562 }
ea041c0e
FB
563}
564
94df27fd 565#if defined(CONFIG_USER_ONLY)
00b941e5 566static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
94df27fd
PB
567{
568 tb_invalidate_phys_page_range(pc, pc + 1, 0);
569}
570#else
00b941e5 571static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
1e7855a5 572{
e8262a1b
MF
573 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
574 if (phys != -1) {
09daed84 575 tb_invalidate_phys_addr(cpu->as,
29d8ec7b 576 phys | (pc & ~TARGET_PAGE_MASK));
e8262a1b 577 }
1e7855a5 578}
c27004ec 579#endif
d720b93d 580
c527ee8f 581#if defined(CONFIG_USER_ONLY)
75a34036 582void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
c527ee8f
PB
583
584{
585}
586
3ee887e8
PM
587int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
588 int flags)
589{
590 return -ENOSYS;
591}
592
593void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
594{
595}
596
75a34036 597int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
c527ee8f
PB
598 int flags, CPUWatchpoint **watchpoint)
599{
600 return -ENOSYS;
601}
602#else
6658ffb8 603/* Add a watchpoint. */
75a34036 604int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 605 int flags, CPUWatchpoint **watchpoint)
6658ffb8 606{
c0ce998e 607 CPUWatchpoint *wp;
6658ffb8 608
05068c0d 609 /* forbid ranges which are empty or run off the end of the address space */
07e2863d 610 if (len == 0 || (addr + len - 1) < addr) {
75a34036
AF
611 error_report("tried to set invalid watchpoint at %"
612 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
b4051334
AL
613 return -EINVAL;
614 }
7267c094 615 wp = g_malloc(sizeof(*wp));
a1d1bb31
AL
616
617 wp->vaddr = addr;
05068c0d 618 wp->len = len;
a1d1bb31
AL
619 wp->flags = flags;
620
2dc9f411 621 /* keep all GDB-injected watchpoints in front */
ff4700b0
AF
622 if (flags & BP_GDB) {
623 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
624 } else {
625 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
626 }
6658ffb8 627
31b030d4 628 tlb_flush_page(cpu, addr);
a1d1bb31
AL
629
630 if (watchpoint)
631 *watchpoint = wp;
632 return 0;
6658ffb8
PB
633}
634
a1d1bb31 635/* Remove a specific watchpoint. */
75a34036 636int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 637 int flags)
6658ffb8 638{
a1d1bb31 639 CPUWatchpoint *wp;
6658ffb8 640
ff4700b0 641 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 642 if (addr == wp->vaddr && len == wp->len
6e140f28 643 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
75a34036 644 cpu_watchpoint_remove_by_ref(cpu, wp);
6658ffb8
PB
645 return 0;
646 }
647 }
a1d1bb31 648 return -ENOENT;
6658ffb8
PB
649}
650
a1d1bb31 651/* Remove a specific watchpoint by reference. */
75a34036 652void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
a1d1bb31 653{
ff4700b0 654 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
7d03f82f 655
31b030d4 656 tlb_flush_page(cpu, watchpoint->vaddr);
a1d1bb31 657
7267c094 658 g_free(watchpoint);
a1d1bb31
AL
659}
660
661/* Remove all matching watchpoints. */
75a34036 662void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 663{
c0ce998e 664 CPUWatchpoint *wp, *next;
a1d1bb31 665
ff4700b0 666 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
75a34036
AF
667 if (wp->flags & mask) {
668 cpu_watchpoint_remove_by_ref(cpu, wp);
669 }
c0ce998e 670 }
7d03f82f 671}
05068c0d
PM
672
673/* Return true if this watchpoint address matches the specified
674 * access (ie the address range covered by the watchpoint overlaps
675 * partially or completely with the address range covered by the
676 * access).
677 */
678static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
679 vaddr addr,
680 vaddr len)
681{
682 /* We know the lengths are non-zero, but a little caution is
683 * required to avoid errors in the case where the range ends
684 * exactly at the top of the address space and so addr + len
685 * wraps round to zero.
686 */
687 vaddr wpend = wp->vaddr + wp->len - 1;
688 vaddr addrend = addr + len - 1;
689
690 return !(addr > wpend || wp->vaddr > addrend);
691}
692
c527ee8f 693#endif
7d03f82f 694
a1d1bb31 695/* Add a breakpoint. */
b3310ab3 696int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
a1d1bb31 697 CPUBreakpoint **breakpoint)
4c3a88a2 698{
c0ce998e 699 CPUBreakpoint *bp;
3b46e624 700
7267c094 701 bp = g_malloc(sizeof(*bp));
4c3a88a2 702
a1d1bb31
AL
703 bp->pc = pc;
704 bp->flags = flags;
705
2dc9f411 706 /* keep all GDB-injected breakpoints in front */
00b941e5 707 if (flags & BP_GDB) {
f0c3c505 708 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
00b941e5 709 } else {
f0c3c505 710 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
00b941e5 711 }
3b46e624 712
f0c3c505 713 breakpoint_invalidate(cpu, pc);
a1d1bb31 714
00b941e5 715 if (breakpoint) {
a1d1bb31 716 *breakpoint = bp;
00b941e5 717 }
4c3a88a2 718 return 0;
4c3a88a2
FB
719}
720
a1d1bb31 721/* Remove a specific breakpoint. */
b3310ab3 722int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
a1d1bb31 723{
a1d1bb31
AL
724 CPUBreakpoint *bp;
725
f0c3c505 726 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
a1d1bb31 727 if (bp->pc == pc && bp->flags == flags) {
b3310ab3 728 cpu_breakpoint_remove_by_ref(cpu, bp);
a1d1bb31
AL
729 return 0;
730 }
7d03f82f 731 }
a1d1bb31 732 return -ENOENT;
7d03f82f
EI
733}
734
a1d1bb31 735/* Remove a specific breakpoint by reference. */
b3310ab3 736void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
4c3a88a2 737{
f0c3c505
AF
738 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
739
740 breakpoint_invalidate(cpu, breakpoint->pc);
a1d1bb31 741
7267c094 742 g_free(breakpoint);
a1d1bb31
AL
743}
744
745/* Remove all matching breakpoints. */
b3310ab3 746void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 747{
c0ce998e 748 CPUBreakpoint *bp, *next;
a1d1bb31 749
f0c3c505 750 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
b3310ab3
AF
751 if (bp->flags & mask) {
752 cpu_breakpoint_remove_by_ref(cpu, bp);
753 }
c0ce998e 754 }
4c3a88a2
FB
755}
756
c33a346e
FB
757/* enable or disable single step mode. EXCP_DEBUG is returned by the
758 CPU loop after each instruction */
3825b28f 759void cpu_single_step(CPUState *cpu, int enabled)
c33a346e 760{
ed2803da
AF
761 if (cpu->singlestep_enabled != enabled) {
762 cpu->singlestep_enabled = enabled;
763 if (kvm_enabled()) {
38e478ec 764 kvm_update_guest_debug(cpu, 0);
ed2803da 765 } else {
ccbb4d44 766 /* must flush all the translated code to avoid inconsistencies */
e22a25c9 767 /* XXX: only flush what is necessary */
38e478ec 768 CPUArchState *env = cpu->env_ptr;
e22a25c9
AL
769 tb_flush(env);
770 }
c33a346e 771 }
c33a346e
FB
772}
773
a47dddd7 774void cpu_abort(CPUState *cpu, const char *fmt, ...)
7501267e
FB
775{
776 va_list ap;
493ae1f0 777 va_list ap2;
7501267e
FB
778
779 va_start(ap, fmt);
493ae1f0 780 va_copy(ap2, ap);
7501267e
FB
781 fprintf(stderr, "qemu: fatal: ");
782 vfprintf(stderr, fmt, ap);
783 fprintf(stderr, "\n");
878096ee 784 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
93fcfe39
AL
785 if (qemu_log_enabled()) {
786 qemu_log("qemu: fatal: ");
787 qemu_log_vprintf(fmt, ap2);
788 qemu_log("\n");
a0762859 789 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
31b1a7b4 790 qemu_log_flush();
93fcfe39 791 qemu_log_close();
924edcae 792 }
493ae1f0 793 va_end(ap2);
f9373291 794 va_end(ap);
fd052bf6
RV
795#if defined(CONFIG_USER_ONLY)
796 {
797 struct sigaction act;
798 sigfillset(&act.sa_mask);
799 act.sa_handler = SIG_DFL;
800 sigaction(SIGABRT, &act, NULL);
801 }
802#endif
7501267e
FB
803 abort();
804}
805
0124311e 806#if !defined(CONFIG_USER_ONLY)
0dc3f44a 807/* Called from RCU critical section */
041603fe
PB
808static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
809{
810 RAMBlock *block;
811
43771539 812 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 813 if (block && addr - block->offset < block->max_length) {
041603fe
PB
814 goto found;
815 }
0dc3f44a 816 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 817 if (addr - block->offset < block->max_length) {
041603fe
PB
818 goto found;
819 }
820 }
821
822 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
823 abort();
824
825found:
43771539
PB
826 /* It is safe to write mru_block outside the iothread lock. This
827 * is what happens:
828 *
829 * mru_block = xxx
830 * rcu_read_unlock()
831 * xxx removed from list
832 * rcu_read_lock()
833 * read mru_block
834 * mru_block = NULL;
835 * call_rcu(reclaim_ramblock, xxx);
836 * rcu_read_unlock()
837 *
838 * atomic_rcu_set is not needed here. The block was already published
839 * when it was placed into the list. Here we're just making an extra
840 * copy of the pointer.
841 */
041603fe
PB
842 ram_list.mru_block = block;
843 return block;
844}
845
a2f4d5be 846static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
d24981d3 847{
041603fe 848 ram_addr_t start1;
a2f4d5be
JQ
849 RAMBlock *block;
850 ram_addr_t end;
851
852 end = TARGET_PAGE_ALIGN(start + length);
853 start &= TARGET_PAGE_MASK;
d24981d3 854
0dc3f44a 855 rcu_read_lock();
041603fe
PB
856 block = qemu_get_ram_block(start);
857 assert(block == qemu_get_ram_block(end - 1));
1240be24 858 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
041603fe 859 cpu_tlb_reset_dirty_all(start1, length);
0dc3f44a 860 rcu_read_unlock();
d24981d3
JQ
861}
862
5579c7f3 863/* Note: start and end must be within the same ram block. */
a2f4d5be 864void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
52159192 865 unsigned client)
1ccde1cb 866{
1ccde1cb
FB
867 if (length == 0)
868 return;
c8d6f66a 869 cpu_physical_memory_clear_dirty_range_type(start, length, client);
f23db169 870
d24981d3 871 if (tcg_enabled()) {
a2f4d5be 872 tlb_reset_dirty_range_all(start, length);
5579c7f3 873 }
1ccde1cb
FB
874}
875
981fdf23 876static void cpu_physical_memory_set_dirty_tracking(bool enable)
74576198
AL
877{
878 in_migration = enable;
74576198
AL
879}
880
79e2b9ae 881/* Called from RCU critical section */
bb0e627a 882hwaddr memory_region_section_get_iotlb(CPUState *cpu,
149f54b5
PB
883 MemoryRegionSection *section,
884 target_ulong vaddr,
885 hwaddr paddr, hwaddr xlat,
886 int prot,
887 target_ulong *address)
e5548617 888{
a8170e5e 889 hwaddr iotlb;
e5548617
BS
890 CPUWatchpoint *wp;
891
cc5bea60 892 if (memory_region_is_ram(section->mr)) {
e5548617
BS
893 /* Normal RAM. */
894 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
149f54b5 895 + xlat;
e5548617 896 if (!section->readonly) {
b41aac4f 897 iotlb |= PHYS_SECTION_NOTDIRTY;
e5548617 898 } else {
b41aac4f 899 iotlb |= PHYS_SECTION_ROM;
e5548617
BS
900 }
901 } else {
1b3fb98f 902 iotlb = section - section->address_space->dispatch->map.sections;
149f54b5 903 iotlb += xlat;
e5548617
BS
904 }
905
906 /* Make accesses to pages with watchpoints go via the
907 watchpoint trap routines. */
ff4700b0 908 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 909 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
e5548617
BS
910 /* Avoid trapping reads of pages with a write breakpoint. */
911 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
b41aac4f 912 iotlb = PHYS_SECTION_WATCH + paddr;
e5548617
BS
913 *address |= TLB_MMIO;
914 break;
915 }
916 }
917 }
918
919 return iotlb;
920}
9fa3e853
FB
921#endif /* defined(CONFIG_USER_ONLY) */
922
e2eef170 923#if !defined(CONFIG_USER_ONLY)
8da3ff18 924
c227f099 925static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 926 uint16_t section);
acc9d80b 927static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
54688b1e 928
a2b257d6
IM
929static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
930 qemu_anon_ram_alloc;
91138037
MA
931
932/*
933 * Set a custom physical guest memory alloator.
934 * Accelerators with unusual needs may need this. Hopefully, we can
935 * get rid of it eventually.
936 */
a2b257d6 937void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
91138037
MA
938{
939 phys_mem_alloc = alloc;
940}
941
53cb28cb
MA
942static uint16_t phys_section_add(PhysPageMap *map,
943 MemoryRegionSection *section)
5312bd8b 944{
68f3f65b
PB
945 /* The physical section number is ORed with a page-aligned
946 * pointer to produce the iotlb entries. Thus it should
947 * never overflow into the page-aligned value.
948 */
53cb28cb 949 assert(map->sections_nb < TARGET_PAGE_SIZE);
68f3f65b 950
53cb28cb
MA
951 if (map->sections_nb == map->sections_nb_alloc) {
952 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
953 map->sections = g_renew(MemoryRegionSection, map->sections,
954 map->sections_nb_alloc);
5312bd8b 955 }
53cb28cb 956 map->sections[map->sections_nb] = *section;
dfde4e6e 957 memory_region_ref(section->mr);
53cb28cb 958 return map->sections_nb++;
5312bd8b
AK
959}
960
058bc4b5
PB
961static void phys_section_destroy(MemoryRegion *mr)
962{
dfde4e6e
PB
963 memory_region_unref(mr);
964
058bc4b5
PB
965 if (mr->subpage) {
966 subpage_t *subpage = container_of(mr, subpage_t, iomem);
b4fefef9 967 object_unref(OBJECT(&subpage->iomem));
058bc4b5
PB
968 g_free(subpage);
969 }
970}
971
6092666e 972static void phys_sections_free(PhysPageMap *map)
5312bd8b 973{
9affd6fc
PB
974 while (map->sections_nb > 0) {
975 MemoryRegionSection *section = &map->sections[--map->sections_nb];
058bc4b5
PB
976 phys_section_destroy(section->mr);
977 }
9affd6fc
PB
978 g_free(map->sections);
979 g_free(map->nodes);
5312bd8b
AK
980}
981
ac1970fb 982static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
0f0cb164
AK
983{
984 subpage_t *subpage;
a8170e5e 985 hwaddr base = section->offset_within_address_space
0f0cb164 986 & TARGET_PAGE_MASK;
97115a8d 987 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
53cb28cb 988 d->map.nodes, d->map.sections);
0f0cb164
AK
989 MemoryRegionSection subsection = {
990 .offset_within_address_space = base,
052e87b0 991 .size = int128_make64(TARGET_PAGE_SIZE),
0f0cb164 992 };
a8170e5e 993 hwaddr start, end;
0f0cb164 994
f3705d53 995 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
0f0cb164 996
f3705d53 997 if (!(existing->mr->subpage)) {
acc9d80b 998 subpage = subpage_init(d->as, base);
3be91e86 999 subsection.address_space = d->as;
0f0cb164 1000 subsection.mr = &subpage->iomem;
ac1970fb 1001 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
53cb28cb 1002 phys_section_add(&d->map, &subsection));
0f0cb164 1003 } else {
f3705d53 1004 subpage = container_of(existing->mr, subpage_t, iomem);
0f0cb164
AK
1005 }
1006 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
052e87b0 1007 end = start + int128_get64(section->size) - 1;
53cb28cb
MA
1008 subpage_register(subpage, start, end,
1009 phys_section_add(&d->map, section));
0f0cb164
AK
1010}
1011
1012
052e87b0
PB
1013static void register_multipage(AddressSpaceDispatch *d,
1014 MemoryRegionSection *section)
33417e70 1015{
a8170e5e 1016 hwaddr start_addr = section->offset_within_address_space;
53cb28cb 1017 uint16_t section_index = phys_section_add(&d->map, section);
052e87b0
PB
1018 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1019 TARGET_PAGE_BITS));
dd81124b 1020
733d5ef5
PB
1021 assert(num_pages);
1022 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
33417e70
FB
1023}
1024
ac1970fb 1025static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
0f0cb164 1026{
89ae337a 1027 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
00752703 1028 AddressSpaceDispatch *d = as->next_dispatch;
99b9cc06 1029 MemoryRegionSection now = *section, remain = *section;
052e87b0 1030 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
0f0cb164 1031
733d5ef5
PB
1032 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1033 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1034 - now.offset_within_address_space;
1035
052e87b0 1036 now.size = int128_min(int128_make64(left), now.size);
ac1970fb 1037 register_subpage(d, &now);
733d5ef5 1038 } else {
052e87b0 1039 now.size = int128_zero();
733d5ef5 1040 }
052e87b0
PB
1041 while (int128_ne(remain.size, now.size)) {
1042 remain.size = int128_sub(remain.size, now.size);
1043 remain.offset_within_address_space += int128_get64(now.size);
1044 remain.offset_within_region += int128_get64(now.size);
69b67646 1045 now = remain;
052e87b0 1046 if (int128_lt(remain.size, page_size)) {
733d5ef5 1047 register_subpage(d, &now);
88266249 1048 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
052e87b0 1049 now.size = page_size;
ac1970fb 1050 register_subpage(d, &now);
69b67646 1051 } else {
052e87b0 1052 now.size = int128_and(now.size, int128_neg(page_size));
ac1970fb 1053 register_multipage(d, &now);
69b67646 1054 }
0f0cb164
AK
1055 }
1056}
1057
62a2744c
SY
1058void qemu_flush_coalesced_mmio_buffer(void)
1059{
1060 if (kvm_enabled())
1061 kvm_flush_coalesced_mmio_buffer();
1062}
1063
b2a8658e
UD
1064void qemu_mutex_lock_ramlist(void)
1065{
1066 qemu_mutex_lock(&ram_list.mutex);
1067}
1068
1069void qemu_mutex_unlock_ramlist(void)
1070{
1071 qemu_mutex_unlock(&ram_list.mutex);
1072}
1073
e1e84ba0 1074#ifdef __linux__
c902760f
MT
1075
1076#include <sys/vfs.h>
1077
1078#define HUGETLBFS_MAGIC 0x958458f6
1079
fc7a5800 1080static long gethugepagesize(const char *path, Error **errp)
c902760f
MT
1081{
1082 struct statfs fs;
1083 int ret;
1084
1085 do {
9742bf26 1086 ret = statfs(path, &fs);
c902760f
MT
1087 } while (ret != 0 && errno == EINTR);
1088
1089 if (ret != 0) {
fc7a5800
HT
1090 error_setg_errno(errp, errno, "failed to get page size of file %s",
1091 path);
9742bf26 1092 return 0;
c902760f
MT
1093 }
1094
1095 if (fs.f_type != HUGETLBFS_MAGIC)
9742bf26 1096 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
c902760f
MT
1097
1098 return fs.f_bsize;
1099}
1100
04b16653
AW
1101static void *file_ram_alloc(RAMBlock *block,
1102 ram_addr_t memory,
7f56e740
PB
1103 const char *path,
1104 Error **errp)
c902760f
MT
1105{
1106 char *filename;
8ca761f6
PF
1107 char *sanitized_name;
1108 char *c;
557529dd 1109 void *area = NULL;
c902760f 1110 int fd;
557529dd 1111 uint64_t hpagesize;
fc7a5800 1112 Error *local_err = NULL;
c902760f 1113
fc7a5800
HT
1114 hpagesize = gethugepagesize(path, &local_err);
1115 if (local_err) {
1116 error_propagate(errp, local_err);
f9a49dfa 1117 goto error;
c902760f 1118 }
a2b257d6 1119 block->mr->align = hpagesize;
c902760f
MT
1120
1121 if (memory < hpagesize) {
557529dd
HT
1122 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1123 "or larger than huge page size 0x%" PRIx64,
1124 memory, hpagesize);
1125 goto error;
c902760f
MT
1126 }
1127
1128 if (kvm_enabled() && !kvm_has_sync_mmu()) {
7f56e740
PB
1129 error_setg(errp,
1130 "host lacks kvm mmu notifiers, -mem-path unsupported");
f9a49dfa 1131 goto error;
c902760f
MT
1132 }
1133
8ca761f6 1134 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
83234bf2 1135 sanitized_name = g_strdup(memory_region_name(block->mr));
8ca761f6
PF
1136 for (c = sanitized_name; *c != '\0'; c++) {
1137 if (*c == '/')
1138 *c = '_';
1139 }
1140
1141 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1142 sanitized_name);
1143 g_free(sanitized_name);
c902760f
MT
1144
1145 fd = mkstemp(filename);
1146 if (fd < 0) {
7f56e740
PB
1147 error_setg_errno(errp, errno,
1148 "unable to create backing store for hugepages");
e4ada482 1149 g_free(filename);
f9a49dfa 1150 goto error;
c902760f
MT
1151 }
1152 unlink(filename);
e4ada482 1153 g_free(filename);
c902760f
MT
1154
1155 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1156
1157 /*
1158 * ftruncate is not supported by hugetlbfs in older
1159 * hosts, so don't bother bailing out on errors.
1160 * If anything goes wrong with it under other filesystems,
1161 * mmap will fail.
1162 */
7f56e740 1163 if (ftruncate(fd, memory)) {
9742bf26 1164 perror("ftruncate");
7f56e740 1165 }
c902760f 1166
dbcb8981
PB
1167 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1168 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1169 fd, 0);
c902760f 1170 if (area == MAP_FAILED) {
7f56e740
PB
1171 error_setg_errno(errp, errno,
1172 "unable to map backing store for hugepages");
9742bf26 1173 close(fd);
f9a49dfa 1174 goto error;
c902760f 1175 }
ef36fa14
MT
1176
1177 if (mem_prealloc) {
38183310 1178 os_mem_prealloc(fd, area, memory);
ef36fa14
MT
1179 }
1180
04b16653 1181 block->fd = fd;
c902760f 1182 return area;
f9a49dfa
MT
1183
1184error:
1185 if (mem_prealloc) {
81b07353 1186 error_report("%s", error_get_pretty(*errp));
f9a49dfa
MT
1187 exit(1);
1188 }
1189 return NULL;
c902760f
MT
1190}
1191#endif
1192
0dc3f44a 1193/* Called with the ramlist lock held. */
d17b5288 1194static ram_addr_t find_ram_offset(ram_addr_t size)
04b16653
AW
1195{
1196 RAMBlock *block, *next_block;
3e837b2c 1197 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
04b16653 1198
49cd9ac6
SH
1199 assert(size != 0); /* it would hand out same offset multiple times */
1200
0dc3f44a 1201 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
04b16653 1202 return 0;
0d53d9fe 1203 }
04b16653 1204
0dc3f44a 1205 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
f15fbc4b 1206 ram_addr_t end, next = RAM_ADDR_MAX;
04b16653 1207
62be4e3a 1208 end = block->offset + block->max_length;
04b16653 1209
0dc3f44a 1210 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
04b16653
AW
1211 if (next_block->offset >= end) {
1212 next = MIN(next, next_block->offset);
1213 }
1214 }
1215 if (next - end >= size && next - end < mingap) {
3e837b2c 1216 offset = end;
04b16653
AW
1217 mingap = next - end;
1218 }
1219 }
3e837b2c
AW
1220
1221 if (offset == RAM_ADDR_MAX) {
1222 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1223 (uint64_t)size);
1224 abort();
1225 }
1226
04b16653
AW
1227 return offset;
1228}
1229
652d7ec2 1230ram_addr_t last_ram_offset(void)
d17b5288
AW
1231{
1232 RAMBlock *block;
1233 ram_addr_t last = 0;
1234
0dc3f44a
MD
1235 rcu_read_lock();
1236 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
62be4e3a 1237 last = MAX(last, block->offset + block->max_length);
0d53d9fe 1238 }
0dc3f44a 1239 rcu_read_unlock();
d17b5288
AW
1240 return last;
1241}
1242
ddb97f1d
JB
1243static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1244{
1245 int ret;
ddb97f1d
JB
1246
1247 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
47c8ca53 1248 if (!machine_dump_guest_core(current_machine)) {
ddb97f1d
JB
1249 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1250 if (ret) {
1251 perror("qemu_madvise");
1252 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1253 "but dump_guest_core=off specified\n");
1254 }
1255 }
1256}
1257
0dc3f44a
MD
1258/* Called within an RCU critical section, or while the ramlist lock
1259 * is held.
1260 */
20cfe881 1261static RAMBlock *find_ram_block(ram_addr_t addr)
84b89d78 1262{
20cfe881 1263 RAMBlock *block;
84b89d78 1264
0dc3f44a 1265 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1266 if (block->offset == addr) {
20cfe881 1267 return block;
c5705a77
AK
1268 }
1269 }
20cfe881
HT
1270
1271 return NULL;
1272}
1273
ae3a7047 1274/* Called with iothread lock held. */
20cfe881
HT
1275void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1276{
ae3a7047 1277 RAMBlock *new_block, *block;
20cfe881 1278
0dc3f44a 1279 rcu_read_lock();
ae3a7047 1280 new_block = find_ram_block(addr);
c5705a77
AK
1281 assert(new_block);
1282 assert(!new_block->idstr[0]);
84b89d78 1283
09e5ab63
AL
1284 if (dev) {
1285 char *id = qdev_get_dev_path(dev);
84b89d78
CM
1286 if (id) {
1287 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
7267c094 1288 g_free(id);
84b89d78
CM
1289 }
1290 }
1291 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1292
0dc3f44a 1293 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1294 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
84b89d78
CM
1295 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1296 new_block->idstr);
1297 abort();
1298 }
1299 }
0dc3f44a 1300 rcu_read_unlock();
c5705a77
AK
1301}
1302
ae3a7047 1303/* Called with iothread lock held. */
20cfe881
HT
1304void qemu_ram_unset_idstr(ram_addr_t addr)
1305{
ae3a7047 1306 RAMBlock *block;
20cfe881 1307
ae3a7047
MD
1308 /* FIXME: arch_init.c assumes that this is not called throughout
1309 * migration. Ignore the problem since hot-unplug during migration
1310 * does not work anyway.
1311 */
1312
0dc3f44a 1313 rcu_read_lock();
ae3a7047 1314 block = find_ram_block(addr);
20cfe881
HT
1315 if (block) {
1316 memset(block->idstr, 0, sizeof(block->idstr));
1317 }
0dc3f44a 1318 rcu_read_unlock();
20cfe881
HT
1319}
1320
8490fc78
LC
1321static int memory_try_enable_merging(void *addr, size_t len)
1322{
75cc7f01 1323 if (!machine_mem_merge(current_machine)) {
8490fc78
LC
1324 /* disabled by the user */
1325 return 0;
1326 }
1327
1328 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1329}
1330
62be4e3a
MT
1331/* Only legal before guest might have detected the memory size: e.g. on
1332 * incoming migration, or right after reset.
1333 *
1334 * As memory core doesn't know how is memory accessed, it is up to
1335 * resize callback to update device state and/or add assertions to detect
1336 * misuse, if necessary.
1337 */
1338int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1339{
1340 RAMBlock *block = find_ram_block(base);
1341
1342 assert(block);
1343
129ddaf3
MT
1344 newsize = TARGET_PAGE_ALIGN(newsize);
1345
62be4e3a
MT
1346 if (block->used_length == newsize) {
1347 return 0;
1348 }
1349
1350 if (!(block->flags & RAM_RESIZEABLE)) {
1351 error_setg_errno(errp, EINVAL,
1352 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1353 " in != 0x" RAM_ADDR_FMT, block->idstr,
1354 newsize, block->used_length);
1355 return -EINVAL;
1356 }
1357
1358 if (block->max_length < newsize) {
1359 error_setg_errno(errp, EINVAL,
1360 "Length too large: %s: 0x" RAM_ADDR_FMT
1361 " > 0x" RAM_ADDR_FMT, block->idstr,
1362 newsize, block->max_length);
1363 return -EINVAL;
1364 }
1365
1366 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1367 block->used_length = newsize;
1368 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1369 memory_region_set_size(block->mr, newsize);
1370 if (block->resized) {
1371 block->resized(block->idstr, newsize, block->host);
1372 }
1373 return 0;
1374}
1375
ef701d7b 1376static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
c5705a77 1377{
e1c57ab8 1378 RAMBlock *block;
0d53d9fe 1379 RAMBlock *last_block = NULL;
2152f5ca
JQ
1380 ram_addr_t old_ram_size, new_ram_size;
1381
1382 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
c5705a77 1383
b2a8658e 1384 qemu_mutex_lock_ramlist();
9b8424d5 1385 new_block->offset = find_ram_offset(new_block->max_length);
e1c57ab8
PB
1386
1387 if (!new_block->host) {
1388 if (xen_enabled()) {
9b8424d5
MT
1389 xen_ram_alloc(new_block->offset, new_block->max_length,
1390 new_block->mr);
e1c57ab8 1391 } else {
9b8424d5 1392 new_block->host = phys_mem_alloc(new_block->max_length,
a2b257d6 1393 &new_block->mr->align);
39228250 1394 if (!new_block->host) {
ef701d7b
HT
1395 error_setg_errno(errp, errno,
1396 "cannot set up guest memory '%s'",
1397 memory_region_name(new_block->mr));
1398 qemu_mutex_unlock_ramlist();
1399 return -1;
39228250 1400 }
9b8424d5 1401 memory_try_enable_merging(new_block->host, new_block->max_length);
6977dfe6 1402 }
c902760f 1403 }
94a6b54f 1404
0d53d9fe
MD
1405 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1406 * QLIST (which has an RCU-friendly variant) does not have insertion at
1407 * tail, so save the last element in last_block.
1408 */
0dc3f44a 1409 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
0d53d9fe 1410 last_block = block;
9b8424d5 1411 if (block->max_length < new_block->max_length) {
abb26d63
PB
1412 break;
1413 }
1414 }
1415 if (block) {
0dc3f44a 1416 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
0d53d9fe 1417 } else if (last_block) {
0dc3f44a 1418 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
0d53d9fe 1419 } else { /* list is empty */
0dc3f44a 1420 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
abb26d63 1421 }
0d6d3c87 1422 ram_list.mru_block = NULL;
94a6b54f 1423
0dc3f44a
MD
1424 /* Write list before version */
1425 smp_wmb();
f798b07f 1426 ram_list.version++;
b2a8658e 1427 qemu_mutex_unlock_ramlist();
f798b07f 1428
2152f5ca
JQ
1429 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1430
1431 if (new_ram_size > old_ram_size) {
1ab4c8ce 1432 int i;
ae3a7047
MD
1433
1434 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1ab4c8ce
JQ
1435 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1436 ram_list.dirty_memory[i] =
1437 bitmap_zero_extend(ram_list.dirty_memory[i],
1438 old_ram_size, new_ram_size);
1439 }
2152f5ca 1440 }
9b8424d5
MT
1441 cpu_physical_memory_set_dirty_range(new_block->offset,
1442 new_block->used_length);
94a6b54f 1443
a904c911
PB
1444 if (new_block->host) {
1445 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1446 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1447 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1448 if (kvm_enabled()) {
1449 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1450 }
e1c57ab8 1451 }
6f0437e8 1452
94a6b54f
PB
1453 return new_block->offset;
1454}
e9a1ab19 1455
0b183fc8 1456#ifdef __linux__
e1c57ab8 1457ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
dbcb8981 1458 bool share, const char *mem_path,
7f56e740 1459 Error **errp)
e1c57ab8
PB
1460{
1461 RAMBlock *new_block;
ef701d7b
HT
1462 ram_addr_t addr;
1463 Error *local_err = NULL;
e1c57ab8
PB
1464
1465 if (xen_enabled()) {
7f56e740
PB
1466 error_setg(errp, "-mem-path not supported with Xen");
1467 return -1;
e1c57ab8
PB
1468 }
1469
1470 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1471 /*
1472 * file_ram_alloc() needs to allocate just like
1473 * phys_mem_alloc, but we haven't bothered to provide
1474 * a hook there.
1475 */
7f56e740
PB
1476 error_setg(errp,
1477 "-mem-path not supported with this accelerator");
1478 return -1;
e1c57ab8
PB
1479 }
1480
1481 size = TARGET_PAGE_ALIGN(size);
1482 new_block = g_malloc0(sizeof(*new_block));
1483 new_block->mr = mr;
9b8424d5
MT
1484 new_block->used_length = size;
1485 new_block->max_length = size;
dbcb8981 1486 new_block->flags = share ? RAM_SHARED : 0;
7f56e740
PB
1487 new_block->host = file_ram_alloc(new_block, size,
1488 mem_path, errp);
1489 if (!new_block->host) {
1490 g_free(new_block);
1491 return -1;
1492 }
1493
ef701d7b
HT
1494 addr = ram_block_add(new_block, &local_err);
1495 if (local_err) {
1496 g_free(new_block);
1497 error_propagate(errp, local_err);
1498 return -1;
1499 }
1500 return addr;
e1c57ab8 1501}
0b183fc8 1502#endif
e1c57ab8 1503
62be4e3a
MT
1504static
1505ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1506 void (*resized)(const char*,
1507 uint64_t length,
1508 void *host),
1509 void *host, bool resizeable,
ef701d7b 1510 MemoryRegion *mr, Error **errp)
e1c57ab8
PB
1511{
1512 RAMBlock *new_block;
ef701d7b
HT
1513 ram_addr_t addr;
1514 Error *local_err = NULL;
e1c57ab8
PB
1515
1516 size = TARGET_PAGE_ALIGN(size);
62be4e3a 1517 max_size = TARGET_PAGE_ALIGN(max_size);
e1c57ab8
PB
1518 new_block = g_malloc0(sizeof(*new_block));
1519 new_block->mr = mr;
62be4e3a 1520 new_block->resized = resized;
9b8424d5
MT
1521 new_block->used_length = size;
1522 new_block->max_length = max_size;
62be4e3a 1523 assert(max_size >= size);
e1c57ab8
PB
1524 new_block->fd = -1;
1525 new_block->host = host;
1526 if (host) {
7bd4f430 1527 new_block->flags |= RAM_PREALLOC;
e1c57ab8 1528 }
62be4e3a
MT
1529 if (resizeable) {
1530 new_block->flags |= RAM_RESIZEABLE;
1531 }
ef701d7b
HT
1532 addr = ram_block_add(new_block, &local_err);
1533 if (local_err) {
1534 g_free(new_block);
1535 error_propagate(errp, local_err);
1536 return -1;
1537 }
1538 return addr;
e1c57ab8
PB
1539}
1540
62be4e3a
MT
1541ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1542 MemoryRegion *mr, Error **errp)
1543{
1544 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1545}
1546
ef701d7b 1547ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
6977dfe6 1548{
62be4e3a
MT
1549 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1550}
1551
1552ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1553 void (*resized)(const char*,
1554 uint64_t length,
1555 void *host),
1556 MemoryRegion *mr, Error **errp)
1557{
1558 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
6977dfe6
YT
1559}
1560
1f2e98b6
AW
1561void qemu_ram_free_from_ptr(ram_addr_t addr)
1562{
1563 RAMBlock *block;
1564
b2a8658e 1565 qemu_mutex_lock_ramlist();
0dc3f44a 1566 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1f2e98b6 1567 if (addr == block->offset) {
0dc3f44a 1568 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1569 ram_list.mru_block = NULL;
0dc3f44a
MD
1570 /* Write list before version */
1571 smp_wmb();
f798b07f 1572 ram_list.version++;
43771539 1573 g_free_rcu(block, rcu);
b2a8658e 1574 break;
1f2e98b6
AW
1575 }
1576 }
b2a8658e 1577 qemu_mutex_unlock_ramlist();
1f2e98b6
AW
1578}
1579
43771539
PB
1580static void reclaim_ramblock(RAMBlock *block)
1581{
1582 if (block->flags & RAM_PREALLOC) {
1583 ;
1584 } else if (xen_enabled()) {
1585 xen_invalidate_map_cache_entry(block->host);
1586#ifndef _WIN32
1587 } else if (block->fd >= 0) {
1588 munmap(block->host, block->max_length);
1589 close(block->fd);
1590#endif
1591 } else {
1592 qemu_anon_ram_free(block->host, block->max_length);
1593 }
1594 g_free(block);
1595}
1596
c227f099 1597void qemu_ram_free(ram_addr_t addr)
e9a1ab19 1598{
04b16653
AW
1599 RAMBlock *block;
1600
b2a8658e 1601 qemu_mutex_lock_ramlist();
0dc3f44a 1602 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
04b16653 1603 if (addr == block->offset) {
0dc3f44a 1604 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1605 ram_list.mru_block = NULL;
0dc3f44a
MD
1606 /* Write list before version */
1607 smp_wmb();
f798b07f 1608 ram_list.version++;
43771539 1609 call_rcu(block, reclaim_ramblock, rcu);
b2a8658e 1610 break;
04b16653
AW
1611 }
1612 }
b2a8658e 1613 qemu_mutex_unlock_ramlist();
e9a1ab19
FB
1614}
1615
cd19cfa2
HY
1616#ifndef _WIN32
1617void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1618{
1619 RAMBlock *block;
1620 ram_addr_t offset;
1621 int flags;
1622 void *area, *vaddr;
1623
0dc3f44a 1624 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
cd19cfa2 1625 offset = addr - block->offset;
9b8424d5 1626 if (offset < block->max_length) {
1240be24 1627 vaddr = ramblock_ptr(block, offset);
7bd4f430 1628 if (block->flags & RAM_PREALLOC) {
cd19cfa2 1629 ;
dfeaf2ab
MA
1630 } else if (xen_enabled()) {
1631 abort();
cd19cfa2
HY
1632 } else {
1633 flags = MAP_FIXED;
3435f395 1634 if (block->fd >= 0) {
dbcb8981
PB
1635 flags |= (block->flags & RAM_SHARED ?
1636 MAP_SHARED : MAP_PRIVATE);
3435f395
MA
1637 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1638 flags, block->fd, offset);
cd19cfa2 1639 } else {
2eb9fbaa
MA
1640 /*
1641 * Remap needs to match alloc. Accelerators that
1642 * set phys_mem_alloc never remap. If they did,
1643 * we'd need a remap hook here.
1644 */
1645 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1646
cd19cfa2
HY
1647 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1648 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1649 flags, -1, 0);
cd19cfa2
HY
1650 }
1651 if (area != vaddr) {
f15fbc4b
AP
1652 fprintf(stderr, "Could not remap addr: "
1653 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
cd19cfa2
HY
1654 length, addr);
1655 exit(1);
1656 }
8490fc78 1657 memory_try_enable_merging(vaddr, length);
ddb97f1d 1658 qemu_ram_setup_dump(vaddr, length);
cd19cfa2 1659 }
cd19cfa2
HY
1660 }
1661 }
1662}
1663#endif /* !_WIN32 */
1664
a35ba7be
PB
1665int qemu_get_ram_fd(ram_addr_t addr)
1666{
ae3a7047
MD
1667 RAMBlock *block;
1668 int fd;
a35ba7be 1669
0dc3f44a 1670 rcu_read_lock();
ae3a7047
MD
1671 block = qemu_get_ram_block(addr);
1672 fd = block->fd;
0dc3f44a 1673 rcu_read_unlock();
ae3a7047 1674 return fd;
a35ba7be
PB
1675}
1676
3fd74b84
DM
1677void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1678{
ae3a7047
MD
1679 RAMBlock *block;
1680 void *ptr;
3fd74b84 1681
0dc3f44a 1682 rcu_read_lock();
ae3a7047
MD
1683 block = qemu_get_ram_block(addr);
1684 ptr = ramblock_ptr(block, 0);
0dc3f44a 1685 rcu_read_unlock();
ae3a7047 1686 return ptr;
3fd74b84
DM
1687}
1688
1b5ec234 1689/* Return a host pointer to ram allocated with qemu_ram_alloc.
ae3a7047
MD
1690 * This should not be used for general purpose DMA. Use address_space_map
1691 * or address_space_rw instead. For local memory (e.g. video ram) that the
1692 * device owns, use memory_region_get_ram_ptr.
0dc3f44a
MD
1693 *
1694 * By the time this function returns, the returned pointer is not protected
1695 * by RCU anymore. If the caller is not within an RCU critical section and
1696 * does not hold the iothread lock, it must have other means of protecting the
1697 * pointer, such as a reference to the region that includes the incoming
1698 * ram_addr_t.
1b5ec234
PB
1699 */
1700void *qemu_get_ram_ptr(ram_addr_t addr)
1701{
ae3a7047
MD
1702 RAMBlock *block;
1703 void *ptr;
1b5ec234 1704
0dc3f44a 1705 rcu_read_lock();
ae3a7047
MD
1706 block = qemu_get_ram_block(addr);
1707
1708 if (xen_enabled() && block->host == NULL) {
0d6d3c87
PB
1709 /* We need to check if the requested address is in the RAM
1710 * because we don't want to map the entire memory in QEMU.
1711 * In that case just map until the end of the page.
1712 */
1713 if (block->offset == 0) {
ae3a7047 1714 ptr = xen_map_cache(addr, 0, 0);
0dc3f44a 1715 goto unlock;
0d6d3c87 1716 }
ae3a7047
MD
1717
1718 block->host = xen_map_cache(block->offset, block->max_length, 1);
0d6d3c87 1719 }
ae3a7047
MD
1720 ptr = ramblock_ptr(block, addr - block->offset);
1721
0dc3f44a
MD
1722unlock:
1723 rcu_read_unlock();
ae3a7047 1724 return ptr;
dc828ca1
PB
1725}
1726
38bee5dc 1727/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
ae3a7047 1728 * but takes a size argument.
0dc3f44a
MD
1729 *
1730 * By the time this function returns, the returned pointer is not protected
1731 * by RCU anymore. If the caller is not within an RCU critical section and
1732 * does not hold the iothread lock, it must have other means of protecting the
1733 * pointer, such as a reference to the region that includes the incoming
1734 * ram_addr_t.
ae3a7047 1735 */
cb85f7ab 1736static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
38bee5dc 1737{
ae3a7047 1738 void *ptr;
8ab934f9
SS
1739 if (*size == 0) {
1740 return NULL;
1741 }
868bb33f 1742 if (xen_enabled()) {
e41d7c69 1743 return xen_map_cache(addr, *size, 1);
868bb33f 1744 } else {
38bee5dc 1745 RAMBlock *block;
0dc3f44a
MD
1746 rcu_read_lock();
1747 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5
MT
1748 if (addr - block->offset < block->max_length) {
1749 if (addr - block->offset + *size > block->max_length)
1750 *size = block->max_length - addr + block->offset;
ae3a7047 1751 ptr = ramblock_ptr(block, addr - block->offset);
0dc3f44a 1752 rcu_read_unlock();
ae3a7047 1753 return ptr;
38bee5dc
SS
1754 }
1755 }
1756
1757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1758 abort();
38bee5dc
SS
1759 }
1760}
1761
7443b437 1762/* Some of the softmmu routines need to translate from a host pointer
ae3a7047
MD
1763 * (typically a TLB entry) back to a ram offset.
1764 *
1765 * By the time this function returns, the returned pointer is not protected
1766 * by RCU anymore. If the caller is not within an RCU critical section and
1767 * does not hold the iothread lock, it must have other means of protecting the
1768 * pointer, such as a reference to the region that includes the incoming
1769 * ram_addr_t.
1770 */
1b5ec234 1771MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
5579c7f3 1772{
94a6b54f
PB
1773 RAMBlock *block;
1774 uint8_t *host = ptr;
ae3a7047 1775 MemoryRegion *mr;
94a6b54f 1776
868bb33f 1777 if (xen_enabled()) {
0dc3f44a 1778 rcu_read_lock();
e41d7c69 1779 *ram_addr = xen_ram_addr_from_mapcache(ptr);
ae3a7047 1780 mr = qemu_get_ram_block(*ram_addr)->mr;
0dc3f44a 1781 rcu_read_unlock();
ae3a7047 1782 return mr;
712c2b41
SS
1783 }
1784
0dc3f44a
MD
1785 rcu_read_lock();
1786 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 1787 if (block && block->host && host - block->host < block->max_length) {
23887b79
PB
1788 goto found;
1789 }
1790
0dc3f44a 1791 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
432d268c
JN
1792 /* This case append when the block is not mapped. */
1793 if (block->host == NULL) {
1794 continue;
1795 }
9b8424d5 1796 if (host - block->host < block->max_length) {
23887b79 1797 goto found;
f471a17e 1798 }
94a6b54f 1799 }
432d268c 1800
0dc3f44a 1801 rcu_read_unlock();
1b5ec234 1802 return NULL;
23887b79
PB
1803
1804found:
1805 *ram_addr = block->offset + (host - block->host);
ae3a7047 1806 mr = block->mr;
0dc3f44a 1807 rcu_read_unlock();
ae3a7047 1808 return mr;
e890261f 1809}
f471a17e 1810
a8170e5e 1811static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
0e0df1e2 1812 uint64_t val, unsigned size)
9fa3e853 1813{
52159192 1814 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
0e0df1e2 1815 tb_invalidate_phys_page_fast(ram_addr, size);
3a7d929e 1816 }
0e0df1e2
AK
1817 switch (size) {
1818 case 1:
1819 stb_p(qemu_get_ram_ptr(ram_addr), val);
1820 break;
1821 case 2:
1822 stw_p(qemu_get_ram_ptr(ram_addr), val);
1823 break;
1824 case 4:
1825 stl_p(qemu_get_ram_ptr(ram_addr), val);
1826 break;
1827 default:
1828 abort();
3a7d929e 1829 }
6886867e 1830 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
f23db169
FB
1831 /* we remove the notdirty callback only if the code has been
1832 flushed */
a2cd8c85 1833 if (!cpu_physical_memory_is_clean(ram_addr)) {
4917cf44 1834 CPUArchState *env = current_cpu->env_ptr;
93afeade 1835 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
4917cf44 1836 }
9fa3e853
FB
1837}
1838
b018ddf6
PB
1839static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1840 unsigned size, bool is_write)
1841{
1842 return is_write;
1843}
1844
0e0df1e2 1845static const MemoryRegionOps notdirty_mem_ops = {
0e0df1e2 1846 .write = notdirty_mem_write,
b018ddf6 1847 .valid.accepts = notdirty_mem_accepts,
0e0df1e2 1848 .endianness = DEVICE_NATIVE_ENDIAN,
1ccde1cb
FB
1849};
1850
0f459d16 1851/* Generate a debug exception if a watchpoint has been hit. */
05068c0d 1852static void check_watchpoint(int offset, int len, int flags)
0f459d16 1853{
93afeade
AF
1854 CPUState *cpu = current_cpu;
1855 CPUArchState *env = cpu->env_ptr;
06d55cc1 1856 target_ulong pc, cs_base;
0f459d16 1857 target_ulong vaddr;
a1d1bb31 1858 CPUWatchpoint *wp;
06d55cc1 1859 int cpu_flags;
0f459d16 1860
ff4700b0 1861 if (cpu->watchpoint_hit) {
06d55cc1
AL
1862 /* We re-entered the check after replacing the TB. Now raise
1863 * the debug interrupt so that is will trigger after the
1864 * current instruction. */
93afeade 1865 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
06d55cc1
AL
1866 return;
1867 }
93afeade 1868 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
ff4700b0 1869 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d
PM
1870 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1871 && (wp->flags & flags)) {
08225676
PM
1872 if (flags == BP_MEM_READ) {
1873 wp->flags |= BP_WATCHPOINT_HIT_READ;
1874 } else {
1875 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1876 }
1877 wp->hitaddr = vaddr;
ff4700b0
AF
1878 if (!cpu->watchpoint_hit) {
1879 cpu->watchpoint_hit = wp;
239c51a5 1880 tb_check_watchpoint(cpu);
6e140f28 1881 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
27103424 1882 cpu->exception_index = EXCP_DEBUG;
5638d180 1883 cpu_loop_exit(cpu);
6e140f28
AL
1884 } else {
1885 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
648f034c 1886 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
0ea8cb88 1887 cpu_resume_from_signal(cpu, NULL);
6e140f28 1888 }
06d55cc1 1889 }
6e140f28
AL
1890 } else {
1891 wp->flags &= ~BP_WATCHPOINT_HIT;
0f459d16
PB
1892 }
1893 }
1894}
1895
6658ffb8
PB
1896/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1897 so these check for a hit then pass through to the normal out-of-line
1898 phys routines. */
a8170e5e 1899static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1ec9b909 1900 unsigned size)
6658ffb8 1901{
05068c0d 1902 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1ec9b909 1903 switch (size) {
2c17449b 1904 case 1: return ldub_phys(&address_space_memory, addr);
41701aa4 1905 case 2: return lduw_phys(&address_space_memory, addr);
fdfba1a2 1906 case 4: return ldl_phys(&address_space_memory, addr);
1ec9b909
AK
1907 default: abort();
1908 }
6658ffb8
PB
1909}
1910
a8170e5e 1911static void watch_mem_write(void *opaque, hwaddr addr,
1ec9b909 1912 uint64_t val, unsigned size)
6658ffb8 1913{
05068c0d 1914 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1ec9b909 1915 switch (size) {
67364150 1916 case 1:
db3be60d 1917 stb_phys(&address_space_memory, addr, val);
67364150
MF
1918 break;
1919 case 2:
5ce5944d 1920 stw_phys(&address_space_memory, addr, val);
67364150
MF
1921 break;
1922 case 4:
ab1da857 1923 stl_phys(&address_space_memory, addr, val);
67364150 1924 break;
1ec9b909
AK
1925 default: abort();
1926 }
6658ffb8
PB
1927}
1928
1ec9b909
AK
1929static const MemoryRegionOps watch_mem_ops = {
1930 .read = watch_mem_read,
1931 .write = watch_mem_write,
1932 .endianness = DEVICE_NATIVE_ENDIAN,
6658ffb8 1933};
6658ffb8 1934
a8170e5e 1935static uint64_t subpage_read(void *opaque, hwaddr addr,
70c68e44 1936 unsigned len)
db7b5426 1937{
acc9d80b 1938 subpage_t *subpage = opaque;
ff6cff75 1939 uint8_t buf[8];
791af8c8 1940
db7b5426 1941#if defined(DEBUG_SUBPAGE)
016e9d62 1942 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
acc9d80b 1943 subpage, len, addr);
db7b5426 1944#endif
acc9d80b
JK
1945 address_space_read(subpage->as, addr + subpage->base, buf, len);
1946 switch (len) {
1947 case 1:
1948 return ldub_p(buf);
1949 case 2:
1950 return lduw_p(buf);
1951 case 4:
1952 return ldl_p(buf);
ff6cff75
PB
1953 case 8:
1954 return ldq_p(buf);
acc9d80b
JK
1955 default:
1956 abort();
1957 }
db7b5426
BS
1958}
1959
a8170e5e 1960static void subpage_write(void *opaque, hwaddr addr,
70c68e44 1961 uint64_t value, unsigned len)
db7b5426 1962{
acc9d80b 1963 subpage_t *subpage = opaque;
ff6cff75 1964 uint8_t buf[8];
acc9d80b 1965
db7b5426 1966#if defined(DEBUG_SUBPAGE)
016e9d62 1967 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
acc9d80b
JK
1968 " value %"PRIx64"\n",
1969 __func__, subpage, len, addr, value);
db7b5426 1970#endif
acc9d80b
JK
1971 switch (len) {
1972 case 1:
1973 stb_p(buf, value);
1974 break;
1975 case 2:
1976 stw_p(buf, value);
1977 break;
1978 case 4:
1979 stl_p(buf, value);
1980 break;
ff6cff75
PB
1981 case 8:
1982 stq_p(buf, value);
1983 break;
acc9d80b
JK
1984 default:
1985 abort();
1986 }
1987 address_space_write(subpage->as, addr + subpage->base, buf, len);
db7b5426
BS
1988}
1989
c353e4cc 1990static bool subpage_accepts(void *opaque, hwaddr addr,
016e9d62 1991 unsigned len, bool is_write)
c353e4cc 1992{
acc9d80b 1993 subpage_t *subpage = opaque;
c353e4cc 1994#if defined(DEBUG_SUBPAGE)
016e9d62 1995 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
acc9d80b 1996 __func__, subpage, is_write ? 'w' : 'r', len, addr);
c353e4cc
PB
1997#endif
1998
acc9d80b 1999 return address_space_access_valid(subpage->as, addr + subpage->base,
016e9d62 2000 len, is_write);
c353e4cc
PB
2001}
2002
70c68e44
AK
2003static const MemoryRegionOps subpage_ops = {
2004 .read = subpage_read,
2005 .write = subpage_write,
ff6cff75
PB
2006 .impl.min_access_size = 1,
2007 .impl.max_access_size = 8,
2008 .valid.min_access_size = 1,
2009 .valid.max_access_size = 8,
c353e4cc 2010 .valid.accepts = subpage_accepts,
70c68e44 2011 .endianness = DEVICE_NATIVE_ENDIAN,
db7b5426
BS
2012};
2013
c227f099 2014static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 2015 uint16_t section)
db7b5426
BS
2016{
2017 int idx, eidx;
2018
2019 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2020 return -1;
2021 idx = SUBPAGE_IDX(start);
2022 eidx = SUBPAGE_IDX(end);
2023#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2024 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2025 __func__, mmio, start, end, idx, eidx, section);
db7b5426 2026#endif
db7b5426 2027 for (; idx <= eidx; idx++) {
5312bd8b 2028 mmio->sub_section[idx] = section;
db7b5426
BS
2029 }
2030
2031 return 0;
2032}
2033
acc9d80b 2034static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
db7b5426 2035{
c227f099 2036 subpage_t *mmio;
db7b5426 2037
7267c094 2038 mmio = g_malloc0(sizeof(subpage_t));
1eec614b 2039
acc9d80b 2040 mmio->as = as;
1eec614b 2041 mmio->base = base;
2c9b15ca 2042 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
b4fefef9 2043 NULL, TARGET_PAGE_SIZE);
b3b00c78 2044 mmio->iomem.subpage = true;
db7b5426 2045#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2046 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2047 mmio, base, TARGET_PAGE_SIZE);
db7b5426 2048#endif
b41aac4f 2049 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
db7b5426
BS
2050
2051 return mmio;
2052}
2053
a656e22f
PC
2054static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2055 MemoryRegion *mr)
5312bd8b 2056{
a656e22f 2057 assert(as);
5312bd8b 2058 MemoryRegionSection section = {
a656e22f 2059 .address_space = as,
5312bd8b
AK
2060 .mr = mr,
2061 .offset_within_address_space = 0,
2062 .offset_within_region = 0,
052e87b0 2063 .size = int128_2_64(),
5312bd8b
AK
2064 };
2065
53cb28cb 2066 return phys_section_add(map, &section);
5312bd8b
AK
2067}
2068
9d82b5a7 2069MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
aa102231 2070{
79e2b9ae
PB
2071 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2072 MemoryRegionSection *sections = d->map.sections;
9d82b5a7
PB
2073
2074 return sections[index & ~TARGET_PAGE_MASK].mr;
aa102231
AK
2075}
2076
e9179ce1
AK
2077static void io_mem_init(void)
2078{
1f6245e5 2079 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2c9b15ca 2080 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1f6245e5 2081 NULL, UINT64_MAX);
2c9b15ca 2082 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1f6245e5 2083 NULL, UINT64_MAX);
2c9b15ca 2084 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1f6245e5 2085 NULL, UINT64_MAX);
e9179ce1
AK
2086}
2087
ac1970fb 2088static void mem_begin(MemoryListener *listener)
00752703
PB
2089{
2090 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
53cb28cb
MA
2091 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2092 uint16_t n;
2093
a656e22f 2094 n = dummy_section(&d->map, as, &io_mem_unassigned);
53cb28cb 2095 assert(n == PHYS_SECTION_UNASSIGNED);
a656e22f 2096 n = dummy_section(&d->map, as, &io_mem_notdirty);
53cb28cb 2097 assert(n == PHYS_SECTION_NOTDIRTY);
a656e22f 2098 n = dummy_section(&d->map, as, &io_mem_rom);
53cb28cb 2099 assert(n == PHYS_SECTION_ROM);
a656e22f 2100 n = dummy_section(&d->map, as, &io_mem_watch);
53cb28cb 2101 assert(n == PHYS_SECTION_WATCH);
00752703 2102
9736e55b 2103 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
00752703
PB
2104 d->as = as;
2105 as->next_dispatch = d;
2106}
2107
79e2b9ae
PB
2108static void address_space_dispatch_free(AddressSpaceDispatch *d)
2109{
2110 phys_sections_free(&d->map);
2111 g_free(d);
2112}
2113
00752703 2114static void mem_commit(MemoryListener *listener)
ac1970fb 2115{
89ae337a 2116 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
0475d94f
PB
2117 AddressSpaceDispatch *cur = as->dispatch;
2118 AddressSpaceDispatch *next = as->next_dispatch;
2119
53cb28cb 2120 phys_page_compact_all(next, next->map.nodes_nb);
b35ba30f 2121
79e2b9ae 2122 atomic_rcu_set(&as->dispatch, next);
53cb28cb 2123 if (cur) {
79e2b9ae 2124 call_rcu(cur, address_space_dispatch_free, rcu);
53cb28cb 2125 }
9affd6fc
PB
2126}
2127
1d71148e 2128static void tcg_commit(MemoryListener *listener)
50c1e149 2129{
182735ef 2130 CPUState *cpu;
117712c3
AK
2131
2132 /* since each CPU stores ram addresses in its TLB cache, we must
2133 reset the modified entries */
2134 /* XXX: slow ! */
bdc44640 2135 CPU_FOREACH(cpu) {
33bde2e1
EI
2136 /* FIXME: Disentangle the cpu.h circular files deps so we can
2137 directly get the right CPU from listener. */
2138 if (cpu->tcg_as_listener != listener) {
2139 continue;
2140 }
76e5c76f 2141 cpu_reload_memory_map(cpu);
117712c3 2142 }
50c1e149
AK
2143}
2144
93632747
AK
2145static void core_log_global_start(MemoryListener *listener)
2146{
981fdf23 2147 cpu_physical_memory_set_dirty_tracking(true);
93632747
AK
2148}
2149
2150static void core_log_global_stop(MemoryListener *listener)
2151{
981fdf23 2152 cpu_physical_memory_set_dirty_tracking(false);
93632747
AK
2153}
2154
93632747 2155static MemoryListener core_memory_listener = {
93632747
AK
2156 .log_global_start = core_log_global_start,
2157 .log_global_stop = core_log_global_stop,
ac1970fb 2158 .priority = 1,
93632747
AK
2159};
2160
ac1970fb
AK
2161void address_space_init_dispatch(AddressSpace *as)
2162{
00752703 2163 as->dispatch = NULL;
89ae337a 2164 as->dispatch_listener = (MemoryListener) {
ac1970fb 2165 .begin = mem_begin,
00752703 2166 .commit = mem_commit,
ac1970fb
AK
2167 .region_add = mem_add,
2168 .region_nop = mem_add,
2169 .priority = 0,
2170 };
89ae337a 2171 memory_listener_register(&as->dispatch_listener, as);
ac1970fb
AK
2172}
2173
6e48e8f9
PB
2174void address_space_unregister(AddressSpace *as)
2175{
2176 memory_listener_unregister(&as->dispatch_listener);
2177}
2178
83f3c251
AK
2179void address_space_destroy_dispatch(AddressSpace *as)
2180{
2181 AddressSpaceDispatch *d = as->dispatch;
2182
79e2b9ae
PB
2183 atomic_rcu_set(&as->dispatch, NULL);
2184 if (d) {
2185 call_rcu(d, address_space_dispatch_free, rcu);
2186 }
83f3c251
AK
2187}
2188
62152b8a
AK
2189static void memory_map_init(void)
2190{
7267c094 2191 system_memory = g_malloc(sizeof(*system_memory));
03f49957 2192
57271d63 2193 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
7dca8043 2194 address_space_init(&address_space_memory, system_memory, "memory");
309cb471 2195
7267c094 2196 system_io = g_malloc(sizeof(*system_io));
3bb28b72
JK
2197 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2198 65536);
7dca8043 2199 address_space_init(&address_space_io, system_io, "I/O");
93632747 2200
f6790af6 2201 memory_listener_register(&core_memory_listener, &address_space_memory);
62152b8a
AK
2202}
2203
2204MemoryRegion *get_system_memory(void)
2205{
2206 return system_memory;
2207}
2208
309cb471
AK
2209MemoryRegion *get_system_io(void)
2210{
2211 return system_io;
2212}
2213
e2eef170
PB
2214#endif /* !defined(CONFIG_USER_ONLY) */
2215
13eb76e0
FB
2216/* physical memory access (slow version, mainly for debug) */
2217#if defined(CONFIG_USER_ONLY)
f17ec444 2218int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
a68fe89c 2219 uint8_t *buf, int len, int is_write)
13eb76e0
FB
2220{
2221 int l, flags;
2222 target_ulong page;
53a5960a 2223 void * p;
13eb76e0
FB
2224
2225 while (len > 0) {
2226 page = addr & TARGET_PAGE_MASK;
2227 l = (page + TARGET_PAGE_SIZE) - addr;
2228 if (l > len)
2229 l = len;
2230 flags = page_get_flags(page);
2231 if (!(flags & PAGE_VALID))
a68fe89c 2232 return -1;
13eb76e0
FB
2233 if (is_write) {
2234 if (!(flags & PAGE_WRITE))
a68fe89c 2235 return -1;
579a97f7 2236 /* XXX: this code should not depend on lock_user */
72fb7daa 2237 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
a68fe89c 2238 return -1;
72fb7daa
AJ
2239 memcpy(p, buf, l);
2240 unlock_user(p, addr, l);
13eb76e0
FB
2241 } else {
2242 if (!(flags & PAGE_READ))
a68fe89c 2243 return -1;
579a97f7 2244 /* XXX: this code should not depend on lock_user */
72fb7daa 2245 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
a68fe89c 2246 return -1;
72fb7daa 2247 memcpy(buf, p, l);
5b257578 2248 unlock_user(p, addr, 0);
13eb76e0
FB
2249 }
2250 len -= l;
2251 buf += l;
2252 addr += l;
2253 }
a68fe89c 2254 return 0;
13eb76e0 2255}
8df1cd07 2256
13eb76e0 2257#else
51d7a9eb 2258
a8170e5e
AK
2259static void invalidate_and_set_dirty(hwaddr addr,
2260 hwaddr length)
51d7a9eb 2261{
f874bf90
PM
2262 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2263 tb_invalidate_phys_range(addr, addr + length, 0);
6886867e 2264 cpu_physical_memory_set_dirty_range_nocode(addr, length);
51d7a9eb 2265 }
e226939d 2266 xen_modified_memory(addr, length);
51d7a9eb
AP
2267}
2268
23326164 2269static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
82f2563f 2270{
e1622f4b 2271 unsigned access_size_max = mr->ops->valid.max_access_size;
23326164
RH
2272
2273 /* Regions are assumed to support 1-4 byte accesses unless
2274 otherwise specified. */
23326164
RH
2275 if (access_size_max == 0) {
2276 access_size_max = 4;
2277 }
2278
2279 /* Bound the maximum access by the alignment of the address. */
2280 if (!mr->ops->impl.unaligned) {
2281 unsigned align_size_max = addr & -addr;
2282 if (align_size_max != 0 && align_size_max < access_size_max) {
2283 access_size_max = align_size_max;
2284 }
82f2563f 2285 }
23326164
RH
2286
2287 /* Don't attempt accesses larger than the maximum. */
2288 if (l > access_size_max) {
2289 l = access_size_max;
82f2563f 2290 }
098178f2
PB
2291 if (l & (l - 1)) {
2292 l = 1 << (qemu_fls(l) - 1);
2293 }
23326164
RH
2294
2295 return l;
82f2563f
PB
2296}
2297
fd8aaa76 2298bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
ac1970fb 2299 int len, bool is_write)
13eb76e0 2300{
149f54b5 2301 hwaddr l;
13eb76e0 2302 uint8_t *ptr;
791af8c8 2303 uint64_t val;
149f54b5 2304 hwaddr addr1;
5c8a00ce 2305 MemoryRegion *mr;
fd8aaa76 2306 bool error = false;
3b46e624 2307
13eb76e0 2308 while (len > 0) {
149f54b5 2309 l = len;
5c8a00ce 2310 mr = address_space_translate(as, addr, &addr1, &l, is_write);
3b46e624 2311
13eb76e0 2312 if (is_write) {
5c8a00ce
PB
2313 if (!memory_access_is_direct(mr, is_write)) {
2314 l = memory_access_size(mr, l, addr1);
4917cf44 2315 /* XXX: could force current_cpu to NULL to avoid
6a00d601 2316 potential bugs */
23326164
RH
2317 switch (l) {
2318 case 8:
2319 /* 64 bit write access */
2320 val = ldq_p(buf);
2321 error |= io_mem_write(mr, addr1, val, 8);
2322 break;
2323 case 4:
1c213d19 2324 /* 32 bit write access */
c27004ec 2325 val = ldl_p(buf);
5c8a00ce 2326 error |= io_mem_write(mr, addr1, val, 4);
23326164
RH
2327 break;
2328 case 2:
1c213d19 2329 /* 16 bit write access */
c27004ec 2330 val = lduw_p(buf);
5c8a00ce 2331 error |= io_mem_write(mr, addr1, val, 2);
23326164
RH
2332 break;
2333 case 1:
1c213d19 2334 /* 8 bit write access */
c27004ec 2335 val = ldub_p(buf);
5c8a00ce 2336 error |= io_mem_write(mr, addr1, val, 1);
23326164
RH
2337 break;
2338 default:
2339 abort();
13eb76e0 2340 }
2bbfa05d 2341 } else {
5c8a00ce 2342 addr1 += memory_region_get_ram_addr(mr);
13eb76e0 2343 /* RAM case */
5579c7f3 2344 ptr = qemu_get_ram_ptr(addr1);
13eb76e0 2345 memcpy(ptr, buf, l);
51d7a9eb 2346 invalidate_and_set_dirty(addr1, l);
13eb76e0
FB
2347 }
2348 } else {
5c8a00ce 2349 if (!memory_access_is_direct(mr, is_write)) {
13eb76e0 2350 /* I/O case */
5c8a00ce 2351 l = memory_access_size(mr, l, addr1);
23326164
RH
2352 switch (l) {
2353 case 8:
2354 /* 64 bit read access */
2355 error |= io_mem_read(mr, addr1, &val, 8);
2356 stq_p(buf, val);
2357 break;
2358 case 4:
13eb76e0 2359 /* 32 bit read access */
5c8a00ce 2360 error |= io_mem_read(mr, addr1, &val, 4);
c27004ec 2361 stl_p(buf, val);
23326164
RH
2362 break;
2363 case 2:
13eb76e0 2364 /* 16 bit read access */
5c8a00ce 2365 error |= io_mem_read(mr, addr1, &val, 2);
c27004ec 2366 stw_p(buf, val);
23326164
RH
2367 break;
2368 case 1:
1c213d19 2369 /* 8 bit read access */
5c8a00ce 2370 error |= io_mem_read(mr, addr1, &val, 1);
c27004ec 2371 stb_p(buf, val);
23326164
RH
2372 break;
2373 default:
2374 abort();
13eb76e0
FB
2375 }
2376 } else {
2377 /* RAM case */
5c8a00ce 2378 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
f3705d53 2379 memcpy(buf, ptr, l);
13eb76e0
FB
2380 }
2381 }
2382 len -= l;
2383 buf += l;
2384 addr += l;
2385 }
fd8aaa76
PB
2386
2387 return error;
13eb76e0 2388}
8df1cd07 2389
fd8aaa76 2390bool address_space_write(AddressSpace *as, hwaddr addr,
ac1970fb
AK
2391 const uint8_t *buf, int len)
2392{
fd8aaa76 2393 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
ac1970fb
AK
2394}
2395
fd8aaa76 2396bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
ac1970fb 2397{
fd8aaa76 2398 return address_space_rw(as, addr, buf, len, false);
ac1970fb
AK
2399}
2400
2401
a8170e5e 2402void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
ac1970fb
AK
2403 int len, int is_write)
2404{
fd8aaa76 2405 address_space_rw(&address_space_memory, addr, buf, len, is_write);
ac1970fb
AK
2406}
2407
582b55a9
AG
2408enum write_rom_type {
2409 WRITE_DATA,
2410 FLUSH_CACHE,
2411};
2412
2a221651 2413static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
582b55a9 2414 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
d0ecd2aa 2415{
149f54b5 2416 hwaddr l;
d0ecd2aa 2417 uint8_t *ptr;
149f54b5 2418 hwaddr addr1;
5c8a00ce 2419 MemoryRegion *mr;
3b46e624 2420
d0ecd2aa 2421 while (len > 0) {
149f54b5 2422 l = len;
2a221651 2423 mr = address_space_translate(as, addr, &addr1, &l, true);
3b46e624 2424
5c8a00ce
PB
2425 if (!(memory_region_is_ram(mr) ||
2426 memory_region_is_romd(mr))) {
d0ecd2aa
FB
2427 /* do nothing */
2428 } else {
5c8a00ce 2429 addr1 += memory_region_get_ram_addr(mr);
d0ecd2aa 2430 /* ROM/RAM case */
5579c7f3 2431 ptr = qemu_get_ram_ptr(addr1);
582b55a9
AG
2432 switch (type) {
2433 case WRITE_DATA:
2434 memcpy(ptr, buf, l);
2435 invalidate_and_set_dirty(addr1, l);
2436 break;
2437 case FLUSH_CACHE:
2438 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2439 break;
2440 }
d0ecd2aa
FB
2441 }
2442 len -= l;
2443 buf += l;
2444 addr += l;
2445 }
2446}
2447
582b55a9 2448/* used for ROM loading : can write in RAM and ROM */
2a221651 2449void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
582b55a9
AG
2450 const uint8_t *buf, int len)
2451{
2a221651 2452 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
582b55a9
AG
2453}
2454
2455void cpu_flush_icache_range(hwaddr start, int len)
2456{
2457 /*
2458 * This function should do the same thing as an icache flush that was
2459 * triggered from within the guest. For TCG we are always cache coherent,
2460 * so there is no need to flush anything. For KVM / Xen we need to flush
2461 * the host's instruction cache at least.
2462 */
2463 if (tcg_enabled()) {
2464 return;
2465 }
2466
2a221651
EI
2467 cpu_physical_memory_write_rom_internal(&address_space_memory,
2468 start, NULL, len, FLUSH_CACHE);
582b55a9
AG
2469}
2470
6d16c2f8 2471typedef struct {
d3e71559 2472 MemoryRegion *mr;
6d16c2f8 2473 void *buffer;
a8170e5e
AK
2474 hwaddr addr;
2475 hwaddr len;
c2cba0ff 2476 bool in_use;
6d16c2f8
AL
2477} BounceBuffer;
2478
2479static BounceBuffer bounce;
2480
ba223c29 2481typedef struct MapClient {
e95205e1 2482 QEMUBH *bh;
72cf2d4f 2483 QLIST_ENTRY(MapClient) link;
ba223c29
AL
2484} MapClient;
2485
38e047b5 2486QemuMutex map_client_list_lock;
72cf2d4f
BS
2487static QLIST_HEAD(map_client_list, MapClient) map_client_list
2488 = QLIST_HEAD_INITIALIZER(map_client_list);
ba223c29 2489
e95205e1
FZ
2490static void cpu_unregister_map_client_do(MapClient *client)
2491{
2492 QLIST_REMOVE(client, link);
2493 g_free(client);
2494}
2495
33b6c2ed
FZ
2496static void cpu_notify_map_clients_locked(void)
2497{
2498 MapClient *client;
2499
2500 while (!QLIST_EMPTY(&map_client_list)) {
2501 client = QLIST_FIRST(&map_client_list);
e95205e1
FZ
2502 qemu_bh_schedule(client->bh);
2503 cpu_unregister_map_client_do(client);
33b6c2ed
FZ
2504 }
2505}
2506
e95205e1 2507void cpu_register_map_client(QEMUBH *bh)
ba223c29 2508{
7267c094 2509 MapClient *client = g_malloc(sizeof(*client));
ba223c29 2510
38e047b5 2511 qemu_mutex_lock(&map_client_list_lock);
e95205e1 2512 client->bh = bh;
72cf2d4f 2513 QLIST_INSERT_HEAD(&map_client_list, client, link);
33b6c2ed
FZ
2514 if (!atomic_read(&bounce.in_use)) {
2515 cpu_notify_map_clients_locked();
2516 }
38e047b5 2517 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2518}
2519
38e047b5
FZ
2520void cpu_exec_init_all(void)
2521{
2522 qemu_mutex_init(&ram_list.mutex);
2523 memory_map_init();
2524 io_mem_init();
2525 qemu_mutex_init(&map_client_list_lock);
2526}
2527
e95205e1 2528void cpu_unregister_map_client(QEMUBH *bh)
ba223c29 2529{
e95205e1 2530 MapClient *client;
ba223c29 2531
e95205e1
FZ
2532 qemu_mutex_lock(&map_client_list_lock);
2533 QLIST_FOREACH(client, &map_client_list, link) {
2534 if (client->bh == bh) {
2535 cpu_unregister_map_client_do(client);
2536 break;
2537 }
2538 }
2539 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2540}
2541
2542static void cpu_notify_map_clients(void)
2543{
38e047b5 2544 qemu_mutex_lock(&map_client_list_lock);
33b6c2ed 2545 cpu_notify_map_clients_locked();
38e047b5 2546 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2547}
2548
51644ab7
PB
2549bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2550{
5c8a00ce 2551 MemoryRegion *mr;
51644ab7
PB
2552 hwaddr l, xlat;
2553
2554 while (len > 0) {
2555 l = len;
5c8a00ce
PB
2556 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2557 if (!memory_access_is_direct(mr, is_write)) {
2558 l = memory_access_size(mr, l, addr);
2559 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
51644ab7
PB
2560 return false;
2561 }
2562 }
2563
2564 len -= l;
2565 addr += l;
2566 }
2567 return true;
2568}
2569
6d16c2f8
AL
2570/* Map a physical memory region into a host virtual address.
2571 * May map a subset of the requested range, given by and returned in *plen.
2572 * May return NULL if resources needed to perform the mapping are exhausted.
2573 * Use only for reads OR writes - not for read-modify-write operations.
ba223c29
AL
2574 * Use cpu_register_map_client() to know when retrying the map operation is
2575 * likely to succeed.
6d16c2f8 2576 */
ac1970fb 2577void *address_space_map(AddressSpace *as,
a8170e5e
AK
2578 hwaddr addr,
2579 hwaddr *plen,
ac1970fb 2580 bool is_write)
6d16c2f8 2581{
a8170e5e 2582 hwaddr len = *plen;
e3127ae0
PB
2583 hwaddr done = 0;
2584 hwaddr l, xlat, base;
2585 MemoryRegion *mr, *this_mr;
2586 ram_addr_t raddr;
6d16c2f8 2587
e3127ae0
PB
2588 if (len == 0) {
2589 return NULL;
2590 }
38bee5dc 2591
e3127ae0
PB
2592 l = len;
2593 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2594 if (!memory_access_is_direct(mr, is_write)) {
c2cba0ff 2595 if (atomic_xchg(&bounce.in_use, true)) {
e3127ae0 2596 return NULL;
6d16c2f8 2597 }
e85d9db5
KW
2598 /* Avoid unbounded allocations */
2599 l = MIN(l, TARGET_PAGE_SIZE);
2600 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
e3127ae0
PB
2601 bounce.addr = addr;
2602 bounce.len = l;
d3e71559
PB
2603
2604 memory_region_ref(mr);
2605 bounce.mr = mr;
e3127ae0
PB
2606 if (!is_write) {
2607 address_space_read(as, addr, bounce.buffer, l);
8ab934f9 2608 }
6d16c2f8 2609
e3127ae0
PB
2610 *plen = l;
2611 return bounce.buffer;
2612 }
2613
2614 base = xlat;
2615 raddr = memory_region_get_ram_addr(mr);
2616
2617 for (;;) {
6d16c2f8
AL
2618 len -= l;
2619 addr += l;
e3127ae0
PB
2620 done += l;
2621 if (len == 0) {
2622 break;
2623 }
2624
2625 l = len;
2626 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2627 if (this_mr != mr || xlat != base + done) {
2628 break;
2629 }
6d16c2f8 2630 }
e3127ae0 2631
d3e71559 2632 memory_region_ref(mr);
e3127ae0
PB
2633 *plen = done;
2634 return qemu_ram_ptr_length(raddr + base, plen);
6d16c2f8
AL
2635}
2636
ac1970fb 2637/* Unmaps a memory region previously mapped by address_space_map().
6d16c2f8
AL
2638 * Will also mark the memory as dirty if is_write == 1. access_len gives
2639 * the amount of memory that was actually read or written by the caller.
2640 */
a8170e5e
AK
2641void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2642 int is_write, hwaddr access_len)
6d16c2f8
AL
2643{
2644 if (buffer != bounce.buffer) {
d3e71559
PB
2645 MemoryRegion *mr;
2646 ram_addr_t addr1;
2647
2648 mr = qemu_ram_addr_from_host(buffer, &addr1);
2649 assert(mr != NULL);
6d16c2f8 2650 if (is_write) {
6886867e 2651 invalidate_and_set_dirty(addr1, access_len);
6d16c2f8 2652 }
868bb33f 2653 if (xen_enabled()) {
e41d7c69 2654 xen_invalidate_map_cache_entry(buffer);
050a0ddf 2655 }
d3e71559 2656 memory_region_unref(mr);
6d16c2f8
AL
2657 return;
2658 }
2659 if (is_write) {
ac1970fb 2660 address_space_write(as, bounce.addr, bounce.buffer, access_len);
6d16c2f8 2661 }
f8a83245 2662 qemu_vfree(bounce.buffer);
6d16c2f8 2663 bounce.buffer = NULL;
d3e71559 2664 memory_region_unref(bounce.mr);
c2cba0ff 2665 atomic_mb_set(&bounce.in_use, false);
ba223c29 2666 cpu_notify_map_clients();
6d16c2f8 2667}
d0ecd2aa 2668
a8170e5e
AK
2669void *cpu_physical_memory_map(hwaddr addr,
2670 hwaddr *plen,
ac1970fb
AK
2671 int is_write)
2672{
2673 return address_space_map(&address_space_memory, addr, plen, is_write);
2674}
2675
a8170e5e
AK
2676void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2677 int is_write, hwaddr access_len)
ac1970fb
AK
2678{
2679 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2680}
2681
8df1cd07 2682/* warning: addr must be aligned */
fdfba1a2 2683static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2684 enum device_endian endian)
8df1cd07 2685{
8df1cd07 2686 uint8_t *ptr;
791af8c8 2687 uint64_t val;
5c8a00ce 2688 MemoryRegion *mr;
149f54b5
PB
2689 hwaddr l = 4;
2690 hwaddr addr1;
8df1cd07 2691
fdfba1a2 2692 mr = address_space_translate(as, addr, &addr1, &l, false);
5c8a00ce 2693 if (l < 4 || !memory_access_is_direct(mr, false)) {
8df1cd07 2694 /* I/O case */
5c8a00ce 2695 io_mem_read(mr, addr1, &val, 4);
1e78bcc1
AG
2696#if defined(TARGET_WORDS_BIGENDIAN)
2697 if (endian == DEVICE_LITTLE_ENDIAN) {
2698 val = bswap32(val);
2699 }
2700#else
2701 if (endian == DEVICE_BIG_ENDIAN) {
2702 val = bswap32(val);
2703 }
2704#endif
8df1cd07
FB
2705 } else {
2706 /* RAM case */
5c8a00ce 2707 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2708 & TARGET_PAGE_MASK)
149f54b5 2709 + addr1);
1e78bcc1
AG
2710 switch (endian) {
2711 case DEVICE_LITTLE_ENDIAN:
2712 val = ldl_le_p(ptr);
2713 break;
2714 case DEVICE_BIG_ENDIAN:
2715 val = ldl_be_p(ptr);
2716 break;
2717 default:
2718 val = ldl_p(ptr);
2719 break;
2720 }
8df1cd07
FB
2721 }
2722 return val;
2723}
2724
fdfba1a2 2725uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2726{
fdfba1a2 2727 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2728}
2729
fdfba1a2 2730uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2731{
fdfba1a2 2732 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2733}
2734
fdfba1a2 2735uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2736{
fdfba1a2 2737 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2738}
2739
84b7b8e7 2740/* warning: addr must be aligned */
2c17449b 2741static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2742 enum device_endian endian)
84b7b8e7 2743{
84b7b8e7
FB
2744 uint8_t *ptr;
2745 uint64_t val;
5c8a00ce 2746 MemoryRegion *mr;
149f54b5
PB
2747 hwaddr l = 8;
2748 hwaddr addr1;
84b7b8e7 2749
2c17449b 2750 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2751 false);
2752 if (l < 8 || !memory_access_is_direct(mr, false)) {
84b7b8e7 2753 /* I/O case */
5c8a00ce 2754 io_mem_read(mr, addr1, &val, 8);
968a5627
PB
2755#if defined(TARGET_WORDS_BIGENDIAN)
2756 if (endian == DEVICE_LITTLE_ENDIAN) {
2757 val = bswap64(val);
2758 }
2759#else
2760 if (endian == DEVICE_BIG_ENDIAN) {
2761 val = bswap64(val);
2762 }
84b7b8e7
FB
2763#endif
2764 } else {
2765 /* RAM case */
5c8a00ce 2766 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2767 & TARGET_PAGE_MASK)
149f54b5 2768 + addr1);
1e78bcc1
AG
2769 switch (endian) {
2770 case DEVICE_LITTLE_ENDIAN:
2771 val = ldq_le_p(ptr);
2772 break;
2773 case DEVICE_BIG_ENDIAN:
2774 val = ldq_be_p(ptr);
2775 break;
2776 default:
2777 val = ldq_p(ptr);
2778 break;
2779 }
84b7b8e7
FB
2780 }
2781 return val;
2782}
2783
2c17449b 2784uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2785{
2c17449b 2786 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2787}
2788
2c17449b 2789uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2790{
2c17449b 2791 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2792}
2793
2c17449b 2794uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2795{
2c17449b 2796 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2797}
2798
aab33094 2799/* XXX: optimize */
2c17449b 2800uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
aab33094
FB
2801{
2802 uint8_t val;
2c17449b 2803 address_space_rw(as, addr, &val, 1, 0);
aab33094
FB
2804 return val;
2805}
2806
733f0b02 2807/* warning: addr must be aligned */
41701aa4 2808static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2809 enum device_endian endian)
aab33094 2810{
733f0b02
MT
2811 uint8_t *ptr;
2812 uint64_t val;
5c8a00ce 2813 MemoryRegion *mr;
149f54b5
PB
2814 hwaddr l = 2;
2815 hwaddr addr1;
733f0b02 2816
41701aa4 2817 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2818 false);
2819 if (l < 2 || !memory_access_is_direct(mr, false)) {
733f0b02 2820 /* I/O case */
5c8a00ce 2821 io_mem_read(mr, addr1, &val, 2);
1e78bcc1
AG
2822#if defined(TARGET_WORDS_BIGENDIAN)
2823 if (endian == DEVICE_LITTLE_ENDIAN) {
2824 val = bswap16(val);
2825 }
2826#else
2827 if (endian == DEVICE_BIG_ENDIAN) {
2828 val = bswap16(val);
2829 }
2830#endif
733f0b02
MT
2831 } else {
2832 /* RAM case */
5c8a00ce 2833 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2834 & TARGET_PAGE_MASK)
149f54b5 2835 + addr1);
1e78bcc1
AG
2836 switch (endian) {
2837 case DEVICE_LITTLE_ENDIAN:
2838 val = lduw_le_p(ptr);
2839 break;
2840 case DEVICE_BIG_ENDIAN:
2841 val = lduw_be_p(ptr);
2842 break;
2843 default:
2844 val = lduw_p(ptr);
2845 break;
2846 }
733f0b02
MT
2847 }
2848 return val;
aab33094
FB
2849}
2850
41701aa4 2851uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2852{
41701aa4 2853 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2854}
2855
41701aa4 2856uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2857{
41701aa4 2858 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2859}
2860
41701aa4 2861uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2862{
41701aa4 2863 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2864}
2865
8df1cd07
FB
2866/* warning: addr must be aligned. The ram page is not masked as dirty
2867 and the code inside is not invalidated. It is useful if the dirty
2868 bits are used to track modified PTEs */
2198a121 2869void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
8df1cd07 2870{
8df1cd07 2871 uint8_t *ptr;
5c8a00ce 2872 MemoryRegion *mr;
149f54b5
PB
2873 hwaddr l = 4;
2874 hwaddr addr1;
8df1cd07 2875
2198a121 2876 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2877 true);
2878 if (l < 4 || !memory_access_is_direct(mr, true)) {
2879 io_mem_write(mr, addr1, val, 4);
8df1cd07 2880 } else {
5c8a00ce 2881 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2882 ptr = qemu_get_ram_ptr(addr1);
8df1cd07 2883 stl_p(ptr, val);
74576198
AL
2884
2885 if (unlikely(in_migration)) {
a2cd8c85 2886 if (cpu_physical_memory_is_clean(addr1)) {
74576198
AL
2887 /* invalidate code */
2888 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2889 /* set dirty bit */
6886867e 2890 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
74576198
AL
2891 }
2892 }
8df1cd07
FB
2893 }
2894}
2895
2896/* warning: addr must be aligned */
ab1da857
EI
2897static inline void stl_phys_internal(AddressSpace *as,
2898 hwaddr addr, uint32_t val,
1e78bcc1 2899 enum device_endian endian)
8df1cd07 2900{
8df1cd07 2901 uint8_t *ptr;
5c8a00ce 2902 MemoryRegion *mr;
149f54b5
PB
2903 hwaddr l = 4;
2904 hwaddr addr1;
8df1cd07 2905
ab1da857 2906 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2907 true);
2908 if (l < 4 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2909#if defined(TARGET_WORDS_BIGENDIAN)
2910 if (endian == DEVICE_LITTLE_ENDIAN) {
2911 val = bswap32(val);
2912 }
2913#else
2914 if (endian == DEVICE_BIG_ENDIAN) {
2915 val = bswap32(val);
2916 }
2917#endif
5c8a00ce 2918 io_mem_write(mr, addr1, val, 4);
8df1cd07 2919 } else {
8df1cd07 2920 /* RAM case */
5c8a00ce 2921 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2922 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2923 switch (endian) {
2924 case DEVICE_LITTLE_ENDIAN:
2925 stl_le_p(ptr, val);
2926 break;
2927 case DEVICE_BIG_ENDIAN:
2928 stl_be_p(ptr, val);
2929 break;
2930 default:
2931 stl_p(ptr, val);
2932 break;
2933 }
51d7a9eb 2934 invalidate_and_set_dirty(addr1, 4);
8df1cd07
FB
2935 }
2936}
2937
ab1da857 2938void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2939{
ab1da857 2940 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2941}
2942
ab1da857 2943void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2944{
ab1da857 2945 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2946}
2947
ab1da857 2948void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2949{
ab1da857 2950 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2951}
2952
aab33094 2953/* XXX: optimize */
db3be60d 2954void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
aab33094
FB
2955{
2956 uint8_t v = val;
db3be60d 2957 address_space_rw(as, addr, &v, 1, 1);
aab33094
FB
2958}
2959
733f0b02 2960/* warning: addr must be aligned */
5ce5944d
EI
2961static inline void stw_phys_internal(AddressSpace *as,
2962 hwaddr addr, uint32_t val,
1e78bcc1 2963 enum device_endian endian)
aab33094 2964{
733f0b02 2965 uint8_t *ptr;
5c8a00ce 2966 MemoryRegion *mr;
149f54b5
PB
2967 hwaddr l = 2;
2968 hwaddr addr1;
733f0b02 2969
5ce5944d 2970 mr = address_space_translate(as, addr, &addr1, &l, true);
5c8a00ce 2971 if (l < 2 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2972#if defined(TARGET_WORDS_BIGENDIAN)
2973 if (endian == DEVICE_LITTLE_ENDIAN) {
2974 val = bswap16(val);
2975 }
2976#else
2977 if (endian == DEVICE_BIG_ENDIAN) {
2978 val = bswap16(val);
2979 }
2980#endif
5c8a00ce 2981 io_mem_write(mr, addr1, val, 2);
733f0b02 2982 } else {
733f0b02 2983 /* RAM case */
5c8a00ce 2984 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
733f0b02 2985 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2986 switch (endian) {
2987 case DEVICE_LITTLE_ENDIAN:
2988 stw_le_p(ptr, val);
2989 break;
2990 case DEVICE_BIG_ENDIAN:
2991 stw_be_p(ptr, val);
2992 break;
2993 default:
2994 stw_p(ptr, val);
2995 break;
2996 }
51d7a9eb 2997 invalidate_and_set_dirty(addr1, 2);
733f0b02 2998 }
aab33094
FB
2999}
3000
5ce5944d 3001void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3002{
5ce5944d 3003 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
3004}
3005
5ce5944d 3006void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3007{
5ce5944d 3008 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
3009}
3010
5ce5944d 3011void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3012{
5ce5944d 3013 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
3014}
3015
aab33094 3016/* XXX: optimize */
f606604f 3017void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
aab33094
FB
3018{
3019 val = tswap64(val);
f606604f 3020 address_space_rw(as, addr, (void *) &val, 8, 1);
aab33094
FB
3021}
3022
f606604f 3023void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3024{
3025 val = cpu_to_le64(val);
f606604f 3026 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3027}
3028
f606604f 3029void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3030{
3031 val = cpu_to_be64(val);
f606604f 3032 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3033}
3034
5e2972fd 3035/* virtual memory access for debug (includes writing to ROM) */
f17ec444 3036int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
b448f2f3 3037 uint8_t *buf, int len, int is_write)
13eb76e0
FB
3038{
3039 int l;
a8170e5e 3040 hwaddr phys_addr;
9b3c35e0 3041 target_ulong page;
13eb76e0
FB
3042
3043 while (len > 0) {
3044 page = addr & TARGET_PAGE_MASK;
f17ec444 3045 phys_addr = cpu_get_phys_page_debug(cpu, page);
13eb76e0
FB
3046 /* if no physical page mapped, return an error */
3047 if (phys_addr == -1)
3048 return -1;
3049 l = (page + TARGET_PAGE_SIZE) - addr;
3050 if (l > len)
3051 l = len;
5e2972fd 3052 phys_addr += (addr & ~TARGET_PAGE_MASK);
2e38847b
EI
3053 if (is_write) {
3054 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3055 } else {
3056 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3057 }
13eb76e0
FB
3058 len -= l;
3059 buf += l;
3060 addr += l;
3061 }
3062 return 0;
3063}
a68fe89c 3064#endif
13eb76e0 3065
8e4a424b
BS
3066/*
3067 * A helper function for the _utterly broken_ virtio device model to find out if
3068 * it's running on a big endian machine. Don't do this at home kids!
3069 */
98ed8ecf
GK
3070bool target_words_bigendian(void);
3071bool target_words_bigendian(void)
8e4a424b
BS
3072{
3073#if defined(TARGET_WORDS_BIGENDIAN)
3074 return true;
3075#else
3076 return false;
3077#endif
3078}
3079
76f35538 3080#ifndef CONFIG_USER_ONLY
a8170e5e 3081bool cpu_physical_memory_is_io(hwaddr phys_addr)
76f35538 3082{
5c8a00ce 3083 MemoryRegion*mr;
149f54b5 3084 hwaddr l = 1;
76f35538 3085
5c8a00ce
PB
3086 mr = address_space_translate(&address_space_memory,
3087 phys_addr, &phys_addr, &l, false);
76f35538 3088
5c8a00ce
PB
3089 return !(memory_region_is_ram(mr) ||
3090 memory_region_is_romd(mr));
76f35538 3091}
bd2fa51f
MH
3092
3093void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3094{
3095 RAMBlock *block;
3096
0dc3f44a
MD
3097 rcu_read_lock();
3098 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 3099 func(block->host, block->offset, block->used_length, opaque);
bd2fa51f 3100 }
0dc3f44a 3101 rcu_read_unlock();
bd2fa51f 3102}
ec3f8c99 3103#endif