]> git.proxmox.com Git - mirror_qemu.git/blame - exec.c
translate-all: use bitmap helpers for PageDesc's bitmap
[mirror_qemu.git] / exec.c
CommitLineData
54936004 1/*
5b6dd868 2 * Virtual page mapping
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
8167ee88 17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
67b915a5 19#include "config.h"
777872e5 20#ifndef _WIN32
a98d49b1 21#include <sys/types.h>
d5a8f07c
FB
22#include <sys/mman.h>
23#endif
54936004 24
055403b2 25#include "qemu-common.h"
6180a181 26#include "cpu.h"
b67d9a52 27#include "tcg.h"
b3c7724c 28#include "hw/hw.h"
4485bd26 29#if !defined(CONFIG_USER_ONLY)
47c8ca53 30#include "hw/boards.h"
4485bd26 31#endif
cc9e98cb 32#include "hw/qdev.h"
1de7afc9 33#include "qemu/osdep.h"
9c17d615 34#include "sysemu/kvm.h"
2ff3de68 35#include "sysemu/sysemu.h"
0d09e41a 36#include "hw/xen/xen.h"
1de7afc9
PB
37#include "qemu/timer.h"
38#include "qemu/config-file.h"
75a34036 39#include "qemu/error-report.h"
022c62cb 40#include "exec/memory.h"
9c17d615 41#include "sysemu/dma.h"
022c62cb 42#include "exec/address-spaces.h"
53a5960a
PB
43#if defined(CONFIG_USER_ONLY)
44#include <qemu.h>
432d268c 45#else /* !CONFIG_USER_ONLY */
9c17d615 46#include "sysemu/xen-mapcache.h"
6506e4f9 47#include "trace.h"
53a5960a 48#endif
0d6d3c87 49#include "exec/cpu-all.h"
0dc3f44a 50#include "qemu/rcu_queue.h"
022c62cb 51#include "exec/cputlb.h"
5b6dd868 52#include "translate-all.h"
0cac1b66 53
022c62cb 54#include "exec/memory-internal.h"
220c3ebd 55#include "exec/ram_addr.h"
67d95c15 56
b35ba30f
MT
57#include "qemu/range.h"
58
db7b5426 59//#define DEBUG_SUBPAGE
1196be37 60
e2eef170 61#if !defined(CONFIG_USER_ONLY)
981fdf23 62static bool in_migration;
94a6b54f 63
0dc3f44a
MD
64/* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
65 * are protected by the ramlist lock.
66 */
0d53d9fe 67RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
62152b8a
AK
68
69static MemoryRegion *system_memory;
309cb471 70static MemoryRegion *system_io;
62152b8a 71
f6790af6
AK
72AddressSpace address_space_io;
73AddressSpace address_space_memory;
2673a5da 74
0844e007 75MemoryRegion io_mem_rom, io_mem_notdirty;
acc9d80b 76static MemoryRegion io_mem_unassigned;
0e0df1e2 77
7bd4f430
PB
78/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
79#define RAM_PREALLOC (1 << 0)
80
dbcb8981
PB
81/* RAM is mmap-ed with MAP_SHARED */
82#define RAM_SHARED (1 << 1)
83
62be4e3a
MT
84/* Only a portion of RAM (used_length) is actually used, and migrated.
85 * This used_length size can change across reboots.
86 */
87#define RAM_RESIZEABLE (1 << 2)
88
e2eef170 89#endif
9fa3e853 90
bdc44640 91struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
6a00d601
FB
92/* current CPU in the current thread. It is only valid inside
93 cpu_exec() */
4917cf44 94DEFINE_TLS(CPUState *, current_cpu);
2e70f6ef 95/* 0 = Do not count executed instructions.
bf20dc07 96 1 = Precise instruction counting.
2e70f6ef 97 2 = Adaptive rate instruction counting. */
5708fc66 98int use_icount;
6a00d601 99
e2eef170 100#if !defined(CONFIG_USER_ONLY)
4346ae3e 101
1db8abb1
PB
102typedef struct PhysPageEntry PhysPageEntry;
103
104struct PhysPageEntry {
9736e55b 105 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
8b795765 106 uint32_t skip : 6;
9736e55b 107 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
8b795765 108 uint32_t ptr : 26;
1db8abb1
PB
109};
110
8b795765
MT
111#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
112
03f49957 113/* Size of the L2 (and L3, etc) page tables. */
57271d63 114#define ADDR_SPACE_BITS 64
03f49957 115
026736ce 116#define P_L2_BITS 9
03f49957
PB
117#define P_L2_SIZE (1 << P_L2_BITS)
118
119#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
120
121typedef PhysPageEntry Node[P_L2_SIZE];
0475d94f 122
53cb28cb 123typedef struct PhysPageMap {
79e2b9ae
PB
124 struct rcu_head rcu;
125
53cb28cb
MA
126 unsigned sections_nb;
127 unsigned sections_nb_alloc;
128 unsigned nodes_nb;
129 unsigned nodes_nb_alloc;
130 Node *nodes;
131 MemoryRegionSection *sections;
132} PhysPageMap;
133
1db8abb1 134struct AddressSpaceDispatch {
79e2b9ae
PB
135 struct rcu_head rcu;
136
1db8abb1
PB
137 /* This is a multi-level map on the physical address space.
138 * The bottom level has pointers to MemoryRegionSections.
139 */
140 PhysPageEntry phys_map;
53cb28cb 141 PhysPageMap map;
acc9d80b 142 AddressSpace *as;
1db8abb1
PB
143};
144
90260c6c
JK
145#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
146typedef struct subpage_t {
147 MemoryRegion iomem;
acc9d80b 148 AddressSpace *as;
90260c6c
JK
149 hwaddr base;
150 uint16_t sub_section[TARGET_PAGE_SIZE];
151} subpage_t;
152
b41aac4f
LPF
153#define PHYS_SECTION_UNASSIGNED 0
154#define PHYS_SECTION_NOTDIRTY 1
155#define PHYS_SECTION_ROM 2
156#define PHYS_SECTION_WATCH 3
5312bd8b 157
e2eef170 158static void io_mem_init(void);
62152b8a 159static void memory_map_init(void);
09daed84 160static void tcg_commit(MemoryListener *listener);
e2eef170 161
1ec9b909 162static MemoryRegion io_mem_watch;
6658ffb8 163#endif
fd6ce8f6 164
6d9a1304 165#if !defined(CONFIG_USER_ONLY)
d6f2ea22 166
53cb28cb 167static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
d6f2ea22 168{
53cb28cb
MA
169 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
170 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
171 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
172 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
d6f2ea22 173 }
f7bf5461
AK
174}
175
53cb28cb 176static uint32_t phys_map_node_alloc(PhysPageMap *map)
f7bf5461
AK
177{
178 unsigned i;
8b795765 179 uint32_t ret;
f7bf5461 180
53cb28cb 181 ret = map->nodes_nb++;
f7bf5461 182 assert(ret != PHYS_MAP_NODE_NIL);
53cb28cb 183 assert(ret != map->nodes_nb_alloc);
03f49957 184 for (i = 0; i < P_L2_SIZE; ++i) {
53cb28cb
MA
185 map->nodes[ret][i].skip = 1;
186 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
d6f2ea22 187 }
f7bf5461 188 return ret;
d6f2ea22
AK
189}
190
53cb28cb
MA
191static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
192 hwaddr *index, hwaddr *nb, uint16_t leaf,
2999097b 193 int level)
f7bf5461
AK
194{
195 PhysPageEntry *p;
196 int i;
03f49957 197 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
108c49b8 198
9736e55b 199 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
53cb28cb
MA
200 lp->ptr = phys_map_node_alloc(map);
201 p = map->nodes[lp->ptr];
f7bf5461 202 if (level == 0) {
03f49957 203 for (i = 0; i < P_L2_SIZE; i++) {
9736e55b 204 p[i].skip = 0;
b41aac4f 205 p[i].ptr = PHYS_SECTION_UNASSIGNED;
4346ae3e 206 }
67c4d23c 207 }
f7bf5461 208 } else {
53cb28cb 209 p = map->nodes[lp->ptr];
92e873b9 210 }
03f49957 211 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
f7bf5461 212
03f49957 213 while (*nb && lp < &p[P_L2_SIZE]) {
07f07b31 214 if ((*index & (step - 1)) == 0 && *nb >= step) {
9736e55b 215 lp->skip = 0;
c19e8800 216 lp->ptr = leaf;
07f07b31
AK
217 *index += step;
218 *nb -= step;
2999097b 219 } else {
53cb28cb 220 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
2999097b
AK
221 }
222 ++lp;
f7bf5461
AK
223 }
224}
225
ac1970fb 226static void phys_page_set(AddressSpaceDispatch *d,
a8170e5e 227 hwaddr index, hwaddr nb,
2999097b 228 uint16_t leaf)
f7bf5461 229{
2999097b 230 /* Wildly overreserve - it doesn't matter much. */
53cb28cb 231 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
5cd2c5b6 232
53cb28cb 233 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
92e873b9
FB
234}
235
b35ba30f
MT
236/* Compact a non leaf page entry. Simply detect that the entry has a single child,
237 * and update our entry so we can skip it and go directly to the destination.
238 */
239static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
240{
241 unsigned valid_ptr = P_L2_SIZE;
242 int valid = 0;
243 PhysPageEntry *p;
244 int i;
245
246 if (lp->ptr == PHYS_MAP_NODE_NIL) {
247 return;
248 }
249
250 p = nodes[lp->ptr];
251 for (i = 0; i < P_L2_SIZE; i++) {
252 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
253 continue;
254 }
255
256 valid_ptr = i;
257 valid++;
258 if (p[i].skip) {
259 phys_page_compact(&p[i], nodes, compacted);
260 }
261 }
262
263 /* We can only compress if there's only one child. */
264 if (valid != 1) {
265 return;
266 }
267
268 assert(valid_ptr < P_L2_SIZE);
269
270 /* Don't compress if it won't fit in the # of bits we have. */
271 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
272 return;
273 }
274
275 lp->ptr = p[valid_ptr].ptr;
276 if (!p[valid_ptr].skip) {
277 /* If our only child is a leaf, make this a leaf. */
278 /* By design, we should have made this node a leaf to begin with so we
279 * should never reach here.
280 * But since it's so simple to handle this, let's do it just in case we
281 * change this rule.
282 */
283 lp->skip = 0;
284 } else {
285 lp->skip += p[valid_ptr].skip;
286 }
287}
288
289static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
290{
291 DECLARE_BITMAP(compacted, nodes_nb);
292
293 if (d->phys_map.skip) {
53cb28cb 294 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
b35ba30f
MT
295 }
296}
297
97115a8d 298static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
9affd6fc 299 Node *nodes, MemoryRegionSection *sections)
92e873b9 300{
31ab2b4a 301 PhysPageEntry *p;
97115a8d 302 hwaddr index = addr >> TARGET_PAGE_BITS;
31ab2b4a 303 int i;
f1f6e3b8 304
9736e55b 305 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
c19e8800 306 if (lp.ptr == PHYS_MAP_NODE_NIL) {
9affd6fc 307 return &sections[PHYS_SECTION_UNASSIGNED];
31ab2b4a 308 }
9affd6fc 309 p = nodes[lp.ptr];
03f49957 310 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
5312bd8b 311 }
b35ba30f
MT
312
313 if (sections[lp.ptr].size.hi ||
314 range_covers_byte(sections[lp.ptr].offset_within_address_space,
315 sections[lp.ptr].size.lo, addr)) {
316 return &sections[lp.ptr];
317 } else {
318 return &sections[PHYS_SECTION_UNASSIGNED];
319 }
f3705d53
AK
320}
321
e5548617
BS
322bool memory_region_is_unassigned(MemoryRegion *mr)
323{
2a8e7499 324 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
5b6dd868 325 && mr != &io_mem_watch;
fd6ce8f6 326}
149f54b5 327
79e2b9ae 328/* Called from RCU critical section */
c7086b4a 329static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
90260c6c
JK
330 hwaddr addr,
331 bool resolve_subpage)
9f029603 332{
90260c6c
JK
333 MemoryRegionSection *section;
334 subpage_t *subpage;
335
53cb28cb 336 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
90260c6c
JK
337 if (resolve_subpage && section->mr->subpage) {
338 subpage = container_of(section->mr, subpage_t, iomem);
53cb28cb 339 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
90260c6c
JK
340 }
341 return section;
9f029603
JK
342}
343
79e2b9ae 344/* Called from RCU critical section */
90260c6c 345static MemoryRegionSection *
c7086b4a 346address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
90260c6c 347 hwaddr *plen, bool resolve_subpage)
149f54b5
PB
348{
349 MemoryRegionSection *section;
a87f3954 350 Int128 diff;
149f54b5 351
c7086b4a 352 section = address_space_lookup_region(d, addr, resolve_subpage);
149f54b5
PB
353 /* Compute offset within MemoryRegionSection */
354 addr -= section->offset_within_address_space;
355
356 /* Compute offset within MemoryRegion */
357 *xlat = addr + section->offset_within_region;
358
359 diff = int128_sub(section->mr->size, int128_make64(addr));
3752a036 360 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
149f54b5
PB
361 return section;
362}
90260c6c 363
a87f3954
PB
364static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
365{
366 if (memory_region_is_ram(mr)) {
367 return !(is_write && mr->readonly);
368 }
369 if (memory_region_is_romd(mr)) {
370 return !is_write;
371 }
372
373 return false;
374}
375
5c8a00ce
PB
376MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
377 hwaddr *xlat, hwaddr *plen,
378 bool is_write)
90260c6c 379{
30951157
AK
380 IOMMUTLBEntry iotlb;
381 MemoryRegionSection *section;
382 MemoryRegion *mr;
30951157 383
79e2b9ae 384 rcu_read_lock();
30951157 385 for (;;) {
79e2b9ae
PB
386 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
387 section = address_space_translate_internal(d, addr, &addr, plen, true);
30951157
AK
388 mr = section->mr;
389
390 if (!mr->iommu_ops) {
391 break;
392 }
393
8d7b8cb9 394 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
30951157
AK
395 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
396 | (addr & iotlb.addr_mask));
23820dbf 397 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
30951157
AK
398 if (!(iotlb.perm & (1 << is_write))) {
399 mr = &io_mem_unassigned;
400 break;
401 }
402
403 as = iotlb.target_as;
404 }
405
fe680d0d 406 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
a87f3954 407 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
23820dbf 408 *plen = MIN(page, *plen);
a87f3954
PB
409 }
410
30951157 411 *xlat = addr;
79e2b9ae 412 rcu_read_unlock();
30951157 413 return mr;
90260c6c
JK
414}
415
79e2b9ae 416/* Called from RCU critical section */
90260c6c 417MemoryRegionSection *
9d82b5a7
PB
418address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
419 hwaddr *xlat, hwaddr *plen)
90260c6c 420{
30951157 421 MemoryRegionSection *section;
9d82b5a7
PB
422 section = address_space_translate_internal(cpu->memory_dispatch,
423 addr, xlat, plen, false);
30951157
AK
424
425 assert(!section->mr->iommu_ops);
426 return section;
90260c6c 427}
5b6dd868 428#endif
fd6ce8f6 429
b170fce3 430#if !defined(CONFIG_USER_ONLY)
5b6dd868
BS
431
432static int cpu_common_post_load(void *opaque, int version_id)
fd6ce8f6 433{
259186a7 434 CPUState *cpu = opaque;
a513fe19 435
5b6dd868
BS
436 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
437 version_id is increased. */
259186a7 438 cpu->interrupt_request &= ~0x01;
c01a71c1 439 tlb_flush(cpu, 1);
5b6dd868
BS
440
441 return 0;
a513fe19 442}
7501267e 443
6c3bff0e
PD
444static int cpu_common_pre_load(void *opaque)
445{
446 CPUState *cpu = opaque;
447
adee6424 448 cpu->exception_index = -1;
6c3bff0e
PD
449
450 return 0;
451}
452
453static bool cpu_common_exception_index_needed(void *opaque)
454{
455 CPUState *cpu = opaque;
456
adee6424 457 return tcg_enabled() && cpu->exception_index != -1;
6c3bff0e
PD
458}
459
460static const VMStateDescription vmstate_cpu_common_exception_index = {
461 .name = "cpu_common/exception_index",
462 .version_id = 1,
463 .minimum_version_id = 1,
464 .fields = (VMStateField[]) {
465 VMSTATE_INT32(exception_index, CPUState),
466 VMSTATE_END_OF_LIST()
467 }
468};
469
1a1562f5 470const VMStateDescription vmstate_cpu_common = {
5b6dd868
BS
471 .name = "cpu_common",
472 .version_id = 1,
473 .minimum_version_id = 1,
6c3bff0e 474 .pre_load = cpu_common_pre_load,
5b6dd868 475 .post_load = cpu_common_post_load,
35d08458 476 .fields = (VMStateField[]) {
259186a7
AF
477 VMSTATE_UINT32(halted, CPUState),
478 VMSTATE_UINT32(interrupt_request, CPUState),
5b6dd868 479 VMSTATE_END_OF_LIST()
6c3bff0e
PD
480 },
481 .subsections = (VMStateSubsection[]) {
482 {
483 .vmsd = &vmstate_cpu_common_exception_index,
484 .needed = cpu_common_exception_index_needed,
485 } , {
486 /* empty */
487 }
5b6dd868
BS
488 }
489};
1a1562f5 490
5b6dd868 491#endif
ea041c0e 492
38d8f5c8 493CPUState *qemu_get_cpu(int index)
ea041c0e 494{
bdc44640 495 CPUState *cpu;
ea041c0e 496
bdc44640 497 CPU_FOREACH(cpu) {
55e5c285 498 if (cpu->cpu_index == index) {
bdc44640 499 return cpu;
55e5c285 500 }
ea041c0e 501 }
5b6dd868 502
bdc44640 503 return NULL;
ea041c0e
FB
504}
505
09daed84
EI
506#if !defined(CONFIG_USER_ONLY)
507void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
508{
509 /* We only support one address space per cpu at the moment. */
510 assert(cpu->as == as);
511
512 if (cpu->tcg_as_listener) {
513 memory_listener_unregister(cpu->tcg_as_listener);
514 } else {
515 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
516 }
517 cpu->tcg_as_listener->commit = tcg_commit;
518 memory_listener_register(cpu->tcg_as_listener, as);
519}
520#endif
521
5b6dd868 522void cpu_exec_init(CPUArchState *env)
ea041c0e 523{
5b6dd868 524 CPUState *cpu = ENV_GET_CPU(env);
b170fce3 525 CPUClass *cc = CPU_GET_CLASS(cpu);
bdc44640 526 CPUState *some_cpu;
5b6dd868
BS
527 int cpu_index;
528
529#if defined(CONFIG_USER_ONLY)
530 cpu_list_lock();
531#endif
5b6dd868 532 cpu_index = 0;
bdc44640 533 CPU_FOREACH(some_cpu) {
5b6dd868
BS
534 cpu_index++;
535 }
55e5c285 536 cpu->cpu_index = cpu_index;
1b1ed8dc 537 cpu->numa_node = 0;
f0c3c505 538 QTAILQ_INIT(&cpu->breakpoints);
ff4700b0 539 QTAILQ_INIT(&cpu->watchpoints);
5b6dd868 540#ifndef CONFIG_USER_ONLY
09daed84 541 cpu->as = &address_space_memory;
5b6dd868 542 cpu->thread_id = qemu_get_thread_id();
cba70549 543 cpu_reload_memory_map(cpu);
5b6dd868 544#endif
bdc44640 545 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
5b6dd868
BS
546#if defined(CONFIG_USER_ONLY)
547 cpu_list_unlock();
548#endif
e0d47944
AF
549 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
550 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
551 }
5b6dd868 552#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
5b6dd868
BS
553 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
554 cpu_save, cpu_load, env);
b170fce3 555 assert(cc->vmsd == NULL);
e0d47944 556 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
5b6dd868 557#endif
b170fce3
AF
558 if (cc->vmsd != NULL) {
559 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
560 }
ea041c0e
FB
561}
562
94df27fd 563#if defined(CONFIG_USER_ONLY)
00b941e5 564static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
94df27fd
PB
565{
566 tb_invalidate_phys_page_range(pc, pc + 1, 0);
567}
568#else
00b941e5 569static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
1e7855a5 570{
e8262a1b
MF
571 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
572 if (phys != -1) {
09daed84 573 tb_invalidate_phys_addr(cpu->as,
29d8ec7b 574 phys | (pc & ~TARGET_PAGE_MASK));
e8262a1b 575 }
1e7855a5 576}
c27004ec 577#endif
d720b93d 578
c527ee8f 579#if defined(CONFIG_USER_ONLY)
75a34036 580void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
c527ee8f
PB
581
582{
583}
584
3ee887e8
PM
585int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
586 int flags)
587{
588 return -ENOSYS;
589}
590
591void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
592{
593}
594
75a34036 595int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
c527ee8f
PB
596 int flags, CPUWatchpoint **watchpoint)
597{
598 return -ENOSYS;
599}
600#else
6658ffb8 601/* Add a watchpoint. */
75a34036 602int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 603 int flags, CPUWatchpoint **watchpoint)
6658ffb8 604{
c0ce998e 605 CPUWatchpoint *wp;
6658ffb8 606
05068c0d 607 /* forbid ranges which are empty or run off the end of the address space */
07e2863d 608 if (len == 0 || (addr + len - 1) < addr) {
75a34036
AF
609 error_report("tried to set invalid watchpoint at %"
610 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
b4051334
AL
611 return -EINVAL;
612 }
7267c094 613 wp = g_malloc(sizeof(*wp));
a1d1bb31
AL
614
615 wp->vaddr = addr;
05068c0d 616 wp->len = len;
a1d1bb31
AL
617 wp->flags = flags;
618
2dc9f411 619 /* keep all GDB-injected watchpoints in front */
ff4700b0
AF
620 if (flags & BP_GDB) {
621 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
622 } else {
623 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
624 }
6658ffb8 625
31b030d4 626 tlb_flush_page(cpu, addr);
a1d1bb31
AL
627
628 if (watchpoint)
629 *watchpoint = wp;
630 return 0;
6658ffb8
PB
631}
632
a1d1bb31 633/* Remove a specific watchpoint. */
75a34036 634int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
a1d1bb31 635 int flags)
6658ffb8 636{
a1d1bb31 637 CPUWatchpoint *wp;
6658ffb8 638
ff4700b0 639 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 640 if (addr == wp->vaddr && len == wp->len
6e140f28 641 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
75a34036 642 cpu_watchpoint_remove_by_ref(cpu, wp);
6658ffb8
PB
643 return 0;
644 }
645 }
a1d1bb31 646 return -ENOENT;
6658ffb8
PB
647}
648
a1d1bb31 649/* Remove a specific watchpoint by reference. */
75a34036 650void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
a1d1bb31 651{
ff4700b0 652 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
7d03f82f 653
31b030d4 654 tlb_flush_page(cpu, watchpoint->vaddr);
a1d1bb31 655
7267c094 656 g_free(watchpoint);
a1d1bb31
AL
657}
658
659/* Remove all matching watchpoints. */
75a34036 660void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 661{
c0ce998e 662 CPUWatchpoint *wp, *next;
a1d1bb31 663
ff4700b0 664 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
75a34036
AF
665 if (wp->flags & mask) {
666 cpu_watchpoint_remove_by_ref(cpu, wp);
667 }
c0ce998e 668 }
7d03f82f 669}
05068c0d
PM
670
671/* Return true if this watchpoint address matches the specified
672 * access (ie the address range covered by the watchpoint overlaps
673 * partially or completely with the address range covered by the
674 * access).
675 */
676static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
677 vaddr addr,
678 vaddr len)
679{
680 /* We know the lengths are non-zero, but a little caution is
681 * required to avoid errors in the case where the range ends
682 * exactly at the top of the address space and so addr + len
683 * wraps round to zero.
684 */
685 vaddr wpend = wp->vaddr + wp->len - 1;
686 vaddr addrend = addr + len - 1;
687
688 return !(addr > wpend || wp->vaddr > addrend);
689}
690
c527ee8f 691#endif
7d03f82f 692
a1d1bb31 693/* Add a breakpoint. */
b3310ab3 694int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
a1d1bb31 695 CPUBreakpoint **breakpoint)
4c3a88a2 696{
c0ce998e 697 CPUBreakpoint *bp;
3b46e624 698
7267c094 699 bp = g_malloc(sizeof(*bp));
4c3a88a2 700
a1d1bb31
AL
701 bp->pc = pc;
702 bp->flags = flags;
703
2dc9f411 704 /* keep all GDB-injected breakpoints in front */
00b941e5 705 if (flags & BP_GDB) {
f0c3c505 706 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
00b941e5 707 } else {
f0c3c505 708 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
00b941e5 709 }
3b46e624 710
f0c3c505 711 breakpoint_invalidate(cpu, pc);
a1d1bb31 712
00b941e5 713 if (breakpoint) {
a1d1bb31 714 *breakpoint = bp;
00b941e5 715 }
4c3a88a2 716 return 0;
4c3a88a2
FB
717}
718
a1d1bb31 719/* Remove a specific breakpoint. */
b3310ab3 720int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
a1d1bb31 721{
a1d1bb31
AL
722 CPUBreakpoint *bp;
723
f0c3c505 724 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
a1d1bb31 725 if (bp->pc == pc && bp->flags == flags) {
b3310ab3 726 cpu_breakpoint_remove_by_ref(cpu, bp);
a1d1bb31
AL
727 return 0;
728 }
7d03f82f 729 }
a1d1bb31 730 return -ENOENT;
7d03f82f
EI
731}
732
a1d1bb31 733/* Remove a specific breakpoint by reference. */
b3310ab3 734void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
4c3a88a2 735{
f0c3c505
AF
736 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
737
738 breakpoint_invalidate(cpu, breakpoint->pc);
a1d1bb31 739
7267c094 740 g_free(breakpoint);
a1d1bb31
AL
741}
742
743/* Remove all matching breakpoints. */
b3310ab3 744void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
a1d1bb31 745{
c0ce998e 746 CPUBreakpoint *bp, *next;
a1d1bb31 747
f0c3c505 748 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
b3310ab3
AF
749 if (bp->flags & mask) {
750 cpu_breakpoint_remove_by_ref(cpu, bp);
751 }
c0ce998e 752 }
4c3a88a2
FB
753}
754
c33a346e
FB
755/* enable or disable single step mode. EXCP_DEBUG is returned by the
756 CPU loop after each instruction */
3825b28f 757void cpu_single_step(CPUState *cpu, int enabled)
c33a346e 758{
ed2803da
AF
759 if (cpu->singlestep_enabled != enabled) {
760 cpu->singlestep_enabled = enabled;
761 if (kvm_enabled()) {
38e478ec 762 kvm_update_guest_debug(cpu, 0);
ed2803da 763 } else {
ccbb4d44 764 /* must flush all the translated code to avoid inconsistencies */
e22a25c9 765 /* XXX: only flush what is necessary */
38e478ec 766 CPUArchState *env = cpu->env_ptr;
e22a25c9
AL
767 tb_flush(env);
768 }
c33a346e 769 }
c33a346e
FB
770}
771
a47dddd7 772void cpu_abort(CPUState *cpu, const char *fmt, ...)
7501267e
FB
773{
774 va_list ap;
493ae1f0 775 va_list ap2;
7501267e
FB
776
777 va_start(ap, fmt);
493ae1f0 778 va_copy(ap2, ap);
7501267e
FB
779 fprintf(stderr, "qemu: fatal: ");
780 vfprintf(stderr, fmt, ap);
781 fprintf(stderr, "\n");
878096ee 782 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
93fcfe39
AL
783 if (qemu_log_enabled()) {
784 qemu_log("qemu: fatal: ");
785 qemu_log_vprintf(fmt, ap2);
786 qemu_log("\n");
a0762859 787 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
31b1a7b4 788 qemu_log_flush();
93fcfe39 789 qemu_log_close();
924edcae 790 }
493ae1f0 791 va_end(ap2);
f9373291 792 va_end(ap);
fd052bf6
RV
793#if defined(CONFIG_USER_ONLY)
794 {
795 struct sigaction act;
796 sigfillset(&act.sa_mask);
797 act.sa_handler = SIG_DFL;
798 sigaction(SIGABRT, &act, NULL);
799 }
800#endif
7501267e
FB
801 abort();
802}
803
0124311e 804#if !defined(CONFIG_USER_ONLY)
0dc3f44a 805/* Called from RCU critical section */
041603fe
PB
806static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
807{
808 RAMBlock *block;
809
43771539 810 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 811 if (block && addr - block->offset < block->max_length) {
041603fe
PB
812 goto found;
813 }
0dc3f44a 814 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 815 if (addr - block->offset < block->max_length) {
041603fe
PB
816 goto found;
817 }
818 }
819
820 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
821 abort();
822
823found:
43771539
PB
824 /* It is safe to write mru_block outside the iothread lock. This
825 * is what happens:
826 *
827 * mru_block = xxx
828 * rcu_read_unlock()
829 * xxx removed from list
830 * rcu_read_lock()
831 * read mru_block
832 * mru_block = NULL;
833 * call_rcu(reclaim_ramblock, xxx);
834 * rcu_read_unlock()
835 *
836 * atomic_rcu_set is not needed here. The block was already published
837 * when it was placed into the list. Here we're just making an extra
838 * copy of the pointer.
839 */
041603fe
PB
840 ram_list.mru_block = block;
841 return block;
842}
843
a2f4d5be 844static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
d24981d3 845{
041603fe 846 ram_addr_t start1;
a2f4d5be
JQ
847 RAMBlock *block;
848 ram_addr_t end;
849
850 end = TARGET_PAGE_ALIGN(start + length);
851 start &= TARGET_PAGE_MASK;
d24981d3 852
0dc3f44a 853 rcu_read_lock();
041603fe
PB
854 block = qemu_get_ram_block(start);
855 assert(block == qemu_get_ram_block(end - 1));
1240be24 856 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
041603fe 857 cpu_tlb_reset_dirty_all(start1, length);
0dc3f44a 858 rcu_read_unlock();
d24981d3
JQ
859}
860
5579c7f3 861/* Note: start and end must be within the same ram block. */
a2f4d5be 862void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
52159192 863 unsigned client)
1ccde1cb 864{
1ccde1cb
FB
865 if (length == 0)
866 return;
c8d6f66a 867 cpu_physical_memory_clear_dirty_range_type(start, length, client);
f23db169 868
d24981d3 869 if (tcg_enabled()) {
a2f4d5be 870 tlb_reset_dirty_range_all(start, length);
5579c7f3 871 }
1ccde1cb
FB
872}
873
981fdf23 874static void cpu_physical_memory_set_dirty_tracking(bool enable)
74576198
AL
875{
876 in_migration = enable;
74576198
AL
877}
878
79e2b9ae 879/* Called from RCU critical section */
bb0e627a 880hwaddr memory_region_section_get_iotlb(CPUState *cpu,
149f54b5
PB
881 MemoryRegionSection *section,
882 target_ulong vaddr,
883 hwaddr paddr, hwaddr xlat,
884 int prot,
885 target_ulong *address)
e5548617 886{
a8170e5e 887 hwaddr iotlb;
e5548617
BS
888 CPUWatchpoint *wp;
889
cc5bea60 890 if (memory_region_is_ram(section->mr)) {
e5548617
BS
891 /* Normal RAM. */
892 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
149f54b5 893 + xlat;
e5548617 894 if (!section->readonly) {
b41aac4f 895 iotlb |= PHYS_SECTION_NOTDIRTY;
e5548617 896 } else {
b41aac4f 897 iotlb |= PHYS_SECTION_ROM;
e5548617
BS
898 }
899 } else {
1b3fb98f 900 iotlb = section - section->address_space->dispatch->map.sections;
149f54b5 901 iotlb += xlat;
e5548617
BS
902 }
903
904 /* Make accesses to pages with watchpoints go via the
905 watchpoint trap routines. */
ff4700b0 906 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d 907 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
e5548617
BS
908 /* Avoid trapping reads of pages with a write breakpoint. */
909 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
b41aac4f 910 iotlb = PHYS_SECTION_WATCH + paddr;
e5548617
BS
911 *address |= TLB_MMIO;
912 break;
913 }
914 }
915 }
916
917 return iotlb;
918}
9fa3e853
FB
919#endif /* defined(CONFIG_USER_ONLY) */
920
e2eef170 921#if !defined(CONFIG_USER_ONLY)
8da3ff18 922
c227f099 923static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 924 uint16_t section);
acc9d80b 925static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
54688b1e 926
a2b257d6
IM
927static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
928 qemu_anon_ram_alloc;
91138037
MA
929
930/*
931 * Set a custom physical guest memory alloator.
932 * Accelerators with unusual needs may need this. Hopefully, we can
933 * get rid of it eventually.
934 */
a2b257d6 935void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
91138037
MA
936{
937 phys_mem_alloc = alloc;
938}
939
53cb28cb
MA
940static uint16_t phys_section_add(PhysPageMap *map,
941 MemoryRegionSection *section)
5312bd8b 942{
68f3f65b
PB
943 /* The physical section number is ORed with a page-aligned
944 * pointer to produce the iotlb entries. Thus it should
945 * never overflow into the page-aligned value.
946 */
53cb28cb 947 assert(map->sections_nb < TARGET_PAGE_SIZE);
68f3f65b 948
53cb28cb
MA
949 if (map->sections_nb == map->sections_nb_alloc) {
950 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
951 map->sections = g_renew(MemoryRegionSection, map->sections,
952 map->sections_nb_alloc);
5312bd8b 953 }
53cb28cb 954 map->sections[map->sections_nb] = *section;
dfde4e6e 955 memory_region_ref(section->mr);
53cb28cb 956 return map->sections_nb++;
5312bd8b
AK
957}
958
058bc4b5
PB
959static void phys_section_destroy(MemoryRegion *mr)
960{
dfde4e6e
PB
961 memory_region_unref(mr);
962
058bc4b5
PB
963 if (mr->subpage) {
964 subpage_t *subpage = container_of(mr, subpage_t, iomem);
b4fefef9 965 object_unref(OBJECT(&subpage->iomem));
058bc4b5
PB
966 g_free(subpage);
967 }
968}
969
6092666e 970static void phys_sections_free(PhysPageMap *map)
5312bd8b 971{
9affd6fc
PB
972 while (map->sections_nb > 0) {
973 MemoryRegionSection *section = &map->sections[--map->sections_nb];
058bc4b5
PB
974 phys_section_destroy(section->mr);
975 }
9affd6fc
PB
976 g_free(map->sections);
977 g_free(map->nodes);
5312bd8b
AK
978}
979
ac1970fb 980static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
0f0cb164
AK
981{
982 subpage_t *subpage;
a8170e5e 983 hwaddr base = section->offset_within_address_space
0f0cb164 984 & TARGET_PAGE_MASK;
97115a8d 985 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
53cb28cb 986 d->map.nodes, d->map.sections);
0f0cb164
AK
987 MemoryRegionSection subsection = {
988 .offset_within_address_space = base,
052e87b0 989 .size = int128_make64(TARGET_PAGE_SIZE),
0f0cb164 990 };
a8170e5e 991 hwaddr start, end;
0f0cb164 992
f3705d53 993 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
0f0cb164 994
f3705d53 995 if (!(existing->mr->subpage)) {
acc9d80b 996 subpage = subpage_init(d->as, base);
3be91e86 997 subsection.address_space = d->as;
0f0cb164 998 subsection.mr = &subpage->iomem;
ac1970fb 999 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
53cb28cb 1000 phys_section_add(&d->map, &subsection));
0f0cb164 1001 } else {
f3705d53 1002 subpage = container_of(existing->mr, subpage_t, iomem);
0f0cb164
AK
1003 }
1004 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
052e87b0 1005 end = start + int128_get64(section->size) - 1;
53cb28cb
MA
1006 subpage_register(subpage, start, end,
1007 phys_section_add(&d->map, section));
0f0cb164
AK
1008}
1009
1010
052e87b0
PB
1011static void register_multipage(AddressSpaceDispatch *d,
1012 MemoryRegionSection *section)
33417e70 1013{
a8170e5e 1014 hwaddr start_addr = section->offset_within_address_space;
53cb28cb 1015 uint16_t section_index = phys_section_add(&d->map, section);
052e87b0
PB
1016 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1017 TARGET_PAGE_BITS));
dd81124b 1018
733d5ef5
PB
1019 assert(num_pages);
1020 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
33417e70
FB
1021}
1022
ac1970fb 1023static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
0f0cb164 1024{
89ae337a 1025 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
00752703 1026 AddressSpaceDispatch *d = as->next_dispatch;
99b9cc06 1027 MemoryRegionSection now = *section, remain = *section;
052e87b0 1028 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
0f0cb164 1029
733d5ef5
PB
1030 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1031 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1032 - now.offset_within_address_space;
1033
052e87b0 1034 now.size = int128_min(int128_make64(left), now.size);
ac1970fb 1035 register_subpage(d, &now);
733d5ef5 1036 } else {
052e87b0 1037 now.size = int128_zero();
733d5ef5 1038 }
052e87b0
PB
1039 while (int128_ne(remain.size, now.size)) {
1040 remain.size = int128_sub(remain.size, now.size);
1041 remain.offset_within_address_space += int128_get64(now.size);
1042 remain.offset_within_region += int128_get64(now.size);
69b67646 1043 now = remain;
052e87b0 1044 if (int128_lt(remain.size, page_size)) {
733d5ef5 1045 register_subpage(d, &now);
88266249 1046 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
052e87b0 1047 now.size = page_size;
ac1970fb 1048 register_subpage(d, &now);
69b67646 1049 } else {
052e87b0 1050 now.size = int128_and(now.size, int128_neg(page_size));
ac1970fb 1051 register_multipage(d, &now);
69b67646 1052 }
0f0cb164
AK
1053 }
1054}
1055
62a2744c
SY
1056void qemu_flush_coalesced_mmio_buffer(void)
1057{
1058 if (kvm_enabled())
1059 kvm_flush_coalesced_mmio_buffer();
1060}
1061
b2a8658e
UD
1062void qemu_mutex_lock_ramlist(void)
1063{
1064 qemu_mutex_lock(&ram_list.mutex);
1065}
1066
1067void qemu_mutex_unlock_ramlist(void)
1068{
1069 qemu_mutex_unlock(&ram_list.mutex);
1070}
1071
e1e84ba0 1072#ifdef __linux__
c902760f
MT
1073
1074#include <sys/vfs.h>
1075
1076#define HUGETLBFS_MAGIC 0x958458f6
1077
fc7a5800 1078static long gethugepagesize(const char *path, Error **errp)
c902760f
MT
1079{
1080 struct statfs fs;
1081 int ret;
1082
1083 do {
9742bf26 1084 ret = statfs(path, &fs);
c902760f
MT
1085 } while (ret != 0 && errno == EINTR);
1086
1087 if (ret != 0) {
fc7a5800
HT
1088 error_setg_errno(errp, errno, "failed to get page size of file %s",
1089 path);
9742bf26 1090 return 0;
c902760f
MT
1091 }
1092
1093 if (fs.f_type != HUGETLBFS_MAGIC)
9742bf26 1094 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
c902760f
MT
1095
1096 return fs.f_bsize;
1097}
1098
04b16653
AW
1099static void *file_ram_alloc(RAMBlock *block,
1100 ram_addr_t memory,
7f56e740
PB
1101 const char *path,
1102 Error **errp)
c902760f
MT
1103{
1104 char *filename;
8ca761f6
PF
1105 char *sanitized_name;
1106 char *c;
557529dd 1107 void *area = NULL;
c902760f 1108 int fd;
557529dd 1109 uint64_t hpagesize;
fc7a5800 1110 Error *local_err = NULL;
c902760f 1111
fc7a5800
HT
1112 hpagesize = gethugepagesize(path, &local_err);
1113 if (local_err) {
1114 error_propagate(errp, local_err);
f9a49dfa 1115 goto error;
c902760f 1116 }
a2b257d6 1117 block->mr->align = hpagesize;
c902760f
MT
1118
1119 if (memory < hpagesize) {
557529dd
HT
1120 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1121 "or larger than huge page size 0x%" PRIx64,
1122 memory, hpagesize);
1123 goto error;
c902760f
MT
1124 }
1125
1126 if (kvm_enabled() && !kvm_has_sync_mmu()) {
7f56e740
PB
1127 error_setg(errp,
1128 "host lacks kvm mmu notifiers, -mem-path unsupported");
f9a49dfa 1129 goto error;
c902760f
MT
1130 }
1131
8ca761f6 1132 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
83234bf2 1133 sanitized_name = g_strdup(memory_region_name(block->mr));
8ca761f6
PF
1134 for (c = sanitized_name; *c != '\0'; c++) {
1135 if (*c == '/')
1136 *c = '_';
1137 }
1138
1139 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1140 sanitized_name);
1141 g_free(sanitized_name);
c902760f
MT
1142
1143 fd = mkstemp(filename);
1144 if (fd < 0) {
7f56e740
PB
1145 error_setg_errno(errp, errno,
1146 "unable to create backing store for hugepages");
e4ada482 1147 g_free(filename);
f9a49dfa 1148 goto error;
c902760f
MT
1149 }
1150 unlink(filename);
e4ada482 1151 g_free(filename);
c902760f
MT
1152
1153 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1154
1155 /*
1156 * ftruncate is not supported by hugetlbfs in older
1157 * hosts, so don't bother bailing out on errors.
1158 * If anything goes wrong with it under other filesystems,
1159 * mmap will fail.
1160 */
7f56e740 1161 if (ftruncate(fd, memory)) {
9742bf26 1162 perror("ftruncate");
7f56e740 1163 }
c902760f 1164
dbcb8981
PB
1165 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1166 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1167 fd, 0);
c902760f 1168 if (area == MAP_FAILED) {
7f56e740
PB
1169 error_setg_errno(errp, errno,
1170 "unable to map backing store for hugepages");
9742bf26 1171 close(fd);
f9a49dfa 1172 goto error;
c902760f 1173 }
ef36fa14
MT
1174
1175 if (mem_prealloc) {
38183310 1176 os_mem_prealloc(fd, area, memory);
ef36fa14
MT
1177 }
1178
04b16653 1179 block->fd = fd;
c902760f 1180 return area;
f9a49dfa
MT
1181
1182error:
1183 if (mem_prealloc) {
81b07353 1184 error_report("%s", error_get_pretty(*errp));
f9a49dfa
MT
1185 exit(1);
1186 }
1187 return NULL;
c902760f
MT
1188}
1189#endif
1190
0dc3f44a 1191/* Called with the ramlist lock held. */
d17b5288 1192static ram_addr_t find_ram_offset(ram_addr_t size)
04b16653
AW
1193{
1194 RAMBlock *block, *next_block;
3e837b2c 1195 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
04b16653 1196
49cd9ac6
SH
1197 assert(size != 0); /* it would hand out same offset multiple times */
1198
0dc3f44a 1199 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
04b16653 1200 return 0;
0d53d9fe 1201 }
04b16653 1202
0dc3f44a 1203 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
f15fbc4b 1204 ram_addr_t end, next = RAM_ADDR_MAX;
04b16653 1205
62be4e3a 1206 end = block->offset + block->max_length;
04b16653 1207
0dc3f44a 1208 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
04b16653
AW
1209 if (next_block->offset >= end) {
1210 next = MIN(next, next_block->offset);
1211 }
1212 }
1213 if (next - end >= size && next - end < mingap) {
3e837b2c 1214 offset = end;
04b16653
AW
1215 mingap = next - end;
1216 }
1217 }
3e837b2c
AW
1218
1219 if (offset == RAM_ADDR_MAX) {
1220 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1221 (uint64_t)size);
1222 abort();
1223 }
1224
04b16653
AW
1225 return offset;
1226}
1227
652d7ec2 1228ram_addr_t last_ram_offset(void)
d17b5288
AW
1229{
1230 RAMBlock *block;
1231 ram_addr_t last = 0;
1232
0dc3f44a
MD
1233 rcu_read_lock();
1234 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
62be4e3a 1235 last = MAX(last, block->offset + block->max_length);
0d53d9fe 1236 }
0dc3f44a 1237 rcu_read_unlock();
d17b5288
AW
1238 return last;
1239}
1240
ddb97f1d
JB
1241static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1242{
1243 int ret;
ddb97f1d
JB
1244
1245 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
47c8ca53 1246 if (!machine_dump_guest_core(current_machine)) {
ddb97f1d
JB
1247 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1248 if (ret) {
1249 perror("qemu_madvise");
1250 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1251 "but dump_guest_core=off specified\n");
1252 }
1253 }
1254}
1255
0dc3f44a
MD
1256/* Called within an RCU critical section, or while the ramlist lock
1257 * is held.
1258 */
20cfe881 1259static RAMBlock *find_ram_block(ram_addr_t addr)
84b89d78 1260{
20cfe881 1261 RAMBlock *block;
84b89d78 1262
0dc3f44a 1263 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1264 if (block->offset == addr) {
20cfe881 1265 return block;
c5705a77
AK
1266 }
1267 }
20cfe881
HT
1268
1269 return NULL;
1270}
1271
ae3a7047 1272/* Called with iothread lock held. */
20cfe881
HT
1273void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1274{
ae3a7047 1275 RAMBlock *new_block, *block;
20cfe881 1276
0dc3f44a 1277 rcu_read_lock();
ae3a7047 1278 new_block = find_ram_block(addr);
c5705a77
AK
1279 assert(new_block);
1280 assert(!new_block->idstr[0]);
84b89d78 1281
09e5ab63
AL
1282 if (dev) {
1283 char *id = qdev_get_dev_path(dev);
84b89d78
CM
1284 if (id) {
1285 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
7267c094 1286 g_free(id);
84b89d78
CM
1287 }
1288 }
1289 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1290
0dc3f44a 1291 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
c5705a77 1292 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
84b89d78
CM
1293 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1294 new_block->idstr);
1295 abort();
1296 }
1297 }
0dc3f44a 1298 rcu_read_unlock();
c5705a77
AK
1299}
1300
ae3a7047 1301/* Called with iothread lock held. */
20cfe881
HT
1302void qemu_ram_unset_idstr(ram_addr_t addr)
1303{
ae3a7047 1304 RAMBlock *block;
20cfe881 1305
ae3a7047
MD
1306 /* FIXME: arch_init.c assumes that this is not called throughout
1307 * migration. Ignore the problem since hot-unplug during migration
1308 * does not work anyway.
1309 */
1310
0dc3f44a 1311 rcu_read_lock();
ae3a7047 1312 block = find_ram_block(addr);
20cfe881
HT
1313 if (block) {
1314 memset(block->idstr, 0, sizeof(block->idstr));
1315 }
0dc3f44a 1316 rcu_read_unlock();
20cfe881
HT
1317}
1318
8490fc78
LC
1319static int memory_try_enable_merging(void *addr, size_t len)
1320{
75cc7f01 1321 if (!machine_mem_merge(current_machine)) {
8490fc78
LC
1322 /* disabled by the user */
1323 return 0;
1324 }
1325
1326 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1327}
1328
62be4e3a
MT
1329/* Only legal before guest might have detected the memory size: e.g. on
1330 * incoming migration, or right after reset.
1331 *
1332 * As memory core doesn't know how is memory accessed, it is up to
1333 * resize callback to update device state and/or add assertions to detect
1334 * misuse, if necessary.
1335 */
1336int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1337{
1338 RAMBlock *block = find_ram_block(base);
1339
1340 assert(block);
1341
129ddaf3
MT
1342 newsize = TARGET_PAGE_ALIGN(newsize);
1343
62be4e3a
MT
1344 if (block->used_length == newsize) {
1345 return 0;
1346 }
1347
1348 if (!(block->flags & RAM_RESIZEABLE)) {
1349 error_setg_errno(errp, EINVAL,
1350 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1351 " in != 0x" RAM_ADDR_FMT, block->idstr,
1352 newsize, block->used_length);
1353 return -EINVAL;
1354 }
1355
1356 if (block->max_length < newsize) {
1357 error_setg_errno(errp, EINVAL,
1358 "Length too large: %s: 0x" RAM_ADDR_FMT
1359 " > 0x" RAM_ADDR_FMT, block->idstr,
1360 newsize, block->max_length);
1361 return -EINVAL;
1362 }
1363
1364 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1365 block->used_length = newsize;
1366 cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
1367 memory_region_set_size(block->mr, newsize);
1368 if (block->resized) {
1369 block->resized(block->idstr, newsize, block->host);
1370 }
1371 return 0;
1372}
1373
ef701d7b 1374static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
c5705a77 1375{
e1c57ab8 1376 RAMBlock *block;
0d53d9fe 1377 RAMBlock *last_block = NULL;
2152f5ca
JQ
1378 ram_addr_t old_ram_size, new_ram_size;
1379
1380 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
c5705a77 1381
b2a8658e 1382 qemu_mutex_lock_ramlist();
9b8424d5 1383 new_block->offset = find_ram_offset(new_block->max_length);
e1c57ab8
PB
1384
1385 if (!new_block->host) {
1386 if (xen_enabled()) {
9b8424d5
MT
1387 xen_ram_alloc(new_block->offset, new_block->max_length,
1388 new_block->mr);
e1c57ab8 1389 } else {
9b8424d5 1390 new_block->host = phys_mem_alloc(new_block->max_length,
a2b257d6 1391 &new_block->mr->align);
39228250 1392 if (!new_block->host) {
ef701d7b
HT
1393 error_setg_errno(errp, errno,
1394 "cannot set up guest memory '%s'",
1395 memory_region_name(new_block->mr));
1396 qemu_mutex_unlock_ramlist();
1397 return -1;
39228250 1398 }
9b8424d5 1399 memory_try_enable_merging(new_block->host, new_block->max_length);
6977dfe6 1400 }
c902760f 1401 }
94a6b54f 1402
0d53d9fe
MD
1403 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1404 * QLIST (which has an RCU-friendly variant) does not have insertion at
1405 * tail, so save the last element in last_block.
1406 */
0dc3f44a 1407 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
0d53d9fe 1408 last_block = block;
9b8424d5 1409 if (block->max_length < new_block->max_length) {
abb26d63
PB
1410 break;
1411 }
1412 }
1413 if (block) {
0dc3f44a 1414 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
0d53d9fe 1415 } else if (last_block) {
0dc3f44a 1416 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
0d53d9fe 1417 } else { /* list is empty */
0dc3f44a 1418 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
abb26d63 1419 }
0d6d3c87 1420 ram_list.mru_block = NULL;
94a6b54f 1421
0dc3f44a
MD
1422 /* Write list before version */
1423 smp_wmb();
f798b07f 1424 ram_list.version++;
b2a8658e 1425 qemu_mutex_unlock_ramlist();
f798b07f 1426
2152f5ca
JQ
1427 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1428
1429 if (new_ram_size > old_ram_size) {
1ab4c8ce 1430 int i;
ae3a7047
MD
1431
1432 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1ab4c8ce
JQ
1433 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1434 ram_list.dirty_memory[i] =
1435 bitmap_zero_extend(ram_list.dirty_memory[i],
1436 old_ram_size, new_ram_size);
1437 }
2152f5ca 1438 }
9b8424d5
MT
1439 cpu_physical_memory_set_dirty_range(new_block->offset,
1440 new_block->used_length);
94a6b54f 1441
a904c911
PB
1442 if (new_block->host) {
1443 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1444 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1445 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1446 if (kvm_enabled()) {
1447 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1448 }
e1c57ab8 1449 }
6f0437e8 1450
94a6b54f
PB
1451 return new_block->offset;
1452}
e9a1ab19 1453
0b183fc8 1454#ifdef __linux__
e1c57ab8 1455ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
dbcb8981 1456 bool share, const char *mem_path,
7f56e740 1457 Error **errp)
e1c57ab8
PB
1458{
1459 RAMBlock *new_block;
ef701d7b
HT
1460 ram_addr_t addr;
1461 Error *local_err = NULL;
e1c57ab8
PB
1462
1463 if (xen_enabled()) {
7f56e740
PB
1464 error_setg(errp, "-mem-path not supported with Xen");
1465 return -1;
e1c57ab8
PB
1466 }
1467
1468 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1469 /*
1470 * file_ram_alloc() needs to allocate just like
1471 * phys_mem_alloc, but we haven't bothered to provide
1472 * a hook there.
1473 */
7f56e740
PB
1474 error_setg(errp,
1475 "-mem-path not supported with this accelerator");
1476 return -1;
e1c57ab8
PB
1477 }
1478
1479 size = TARGET_PAGE_ALIGN(size);
1480 new_block = g_malloc0(sizeof(*new_block));
1481 new_block->mr = mr;
9b8424d5
MT
1482 new_block->used_length = size;
1483 new_block->max_length = size;
dbcb8981 1484 new_block->flags = share ? RAM_SHARED : 0;
7f56e740
PB
1485 new_block->host = file_ram_alloc(new_block, size,
1486 mem_path, errp);
1487 if (!new_block->host) {
1488 g_free(new_block);
1489 return -1;
1490 }
1491
ef701d7b
HT
1492 addr = ram_block_add(new_block, &local_err);
1493 if (local_err) {
1494 g_free(new_block);
1495 error_propagate(errp, local_err);
1496 return -1;
1497 }
1498 return addr;
e1c57ab8 1499}
0b183fc8 1500#endif
e1c57ab8 1501
62be4e3a
MT
1502static
1503ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1504 void (*resized)(const char*,
1505 uint64_t length,
1506 void *host),
1507 void *host, bool resizeable,
ef701d7b 1508 MemoryRegion *mr, Error **errp)
e1c57ab8
PB
1509{
1510 RAMBlock *new_block;
ef701d7b
HT
1511 ram_addr_t addr;
1512 Error *local_err = NULL;
e1c57ab8
PB
1513
1514 size = TARGET_PAGE_ALIGN(size);
62be4e3a 1515 max_size = TARGET_PAGE_ALIGN(max_size);
e1c57ab8
PB
1516 new_block = g_malloc0(sizeof(*new_block));
1517 new_block->mr = mr;
62be4e3a 1518 new_block->resized = resized;
9b8424d5
MT
1519 new_block->used_length = size;
1520 new_block->max_length = max_size;
62be4e3a 1521 assert(max_size >= size);
e1c57ab8
PB
1522 new_block->fd = -1;
1523 new_block->host = host;
1524 if (host) {
7bd4f430 1525 new_block->flags |= RAM_PREALLOC;
e1c57ab8 1526 }
62be4e3a
MT
1527 if (resizeable) {
1528 new_block->flags |= RAM_RESIZEABLE;
1529 }
ef701d7b
HT
1530 addr = ram_block_add(new_block, &local_err);
1531 if (local_err) {
1532 g_free(new_block);
1533 error_propagate(errp, local_err);
1534 return -1;
1535 }
1536 return addr;
e1c57ab8
PB
1537}
1538
62be4e3a
MT
1539ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1540 MemoryRegion *mr, Error **errp)
1541{
1542 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1543}
1544
ef701d7b 1545ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
6977dfe6 1546{
62be4e3a
MT
1547 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1548}
1549
1550ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1551 void (*resized)(const char*,
1552 uint64_t length,
1553 void *host),
1554 MemoryRegion *mr, Error **errp)
1555{
1556 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
6977dfe6
YT
1557}
1558
1f2e98b6
AW
1559void qemu_ram_free_from_ptr(ram_addr_t addr)
1560{
1561 RAMBlock *block;
1562
b2a8658e 1563 qemu_mutex_lock_ramlist();
0dc3f44a 1564 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1f2e98b6 1565 if (addr == block->offset) {
0dc3f44a 1566 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1567 ram_list.mru_block = NULL;
0dc3f44a
MD
1568 /* Write list before version */
1569 smp_wmb();
f798b07f 1570 ram_list.version++;
43771539 1571 g_free_rcu(block, rcu);
b2a8658e 1572 break;
1f2e98b6
AW
1573 }
1574 }
b2a8658e 1575 qemu_mutex_unlock_ramlist();
1f2e98b6
AW
1576}
1577
43771539
PB
1578static void reclaim_ramblock(RAMBlock *block)
1579{
1580 if (block->flags & RAM_PREALLOC) {
1581 ;
1582 } else if (xen_enabled()) {
1583 xen_invalidate_map_cache_entry(block->host);
1584#ifndef _WIN32
1585 } else if (block->fd >= 0) {
1586 munmap(block->host, block->max_length);
1587 close(block->fd);
1588#endif
1589 } else {
1590 qemu_anon_ram_free(block->host, block->max_length);
1591 }
1592 g_free(block);
1593}
1594
c227f099 1595void qemu_ram_free(ram_addr_t addr)
e9a1ab19 1596{
04b16653
AW
1597 RAMBlock *block;
1598
b2a8658e 1599 qemu_mutex_lock_ramlist();
0dc3f44a 1600 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
04b16653 1601 if (addr == block->offset) {
0dc3f44a 1602 QLIST_REMOVE_RCU(block, next);
0d6d3c87 1603 ram_list.mru_block = NULL;
0dc3f44a
MD
1604 /* Write list before version */
1605 smp_wmb();
f798b07f 1606 ram_list.version++;
43771539 1607 call_rcu(block, reclaim_ramblock, rcu);
b2a8658e 1608 break;
04b16653
AW
1609 }
1610 }
b2a8658e 1611 qemu_mutex_unlock_ramlist();
e9a1ab19
FB
1612}
1613
cd19cfa2
HY
1614#ifndef _WIN32
1615void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1616{
1617 RAMBlock *block;
1618 ram_addr_t offset;
1619 int flags;
1620 void *area, *vaddr;
1621
0dc3f44a 1622 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
cd19cfa2 1623 offset = addr - block->offset;
9b8424d5 1624 if (offset < block->max_length) {
1240be24 1625 vaddr = ramblock_ptr(block, offset);
7bd4f430 1626 if (block->flags & RAM_PREALLOC) {
cd19cfa2 1627 ;
dfeaf2ab
MA
1628 } else if (xen_enabled()) {
1629 abort();
cd19cfa2
HY
1630 } else {
1631 flags = MAP_FIXED;
3435f395 1632 if (block->fd >= 0) {
dbcb8981
PB
1633 flags |= (block->flags & RAM_SHARED ?
1634 MAP_SHARED : MAP_PRIVATE);
3435f395
MA
1635 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1636 flags, block->fd, offset);
cd19cfa2 1637 } else {
2eb9fbaa
MA
1638 /*
1639 * Remap needs to match alloc. Accelerators that
1640 * set phys_mem_alloc never remap. If they did,
1641 * we'd need a remap hook here.
1642 */
1643 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1644
cd19cfa2
HY
1645 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1646 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1647 flags, -1, 0);
cd19cfa2
HY
1648 }
1649 if (area != vaddr) {
f15fbc4b
AP
1650 fprintf(stderr, "Could not remap addr: "
1651 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
cd19cfa2
HY
1652 length, addr);
1653 exit(1);
1654 }
8490fc78 1655 memory_try_enable_merging(vaddr, length);
ddb97f1d 1656 qemu_ram_setup_dump(vaddr, length);
cd19cfa2 1657 }
cd19cfa2
HY
1658 }
1659 }
1660}
1661#endif /* !_WIN32 */
1662
a35ba7be
PB
1663int qemu_get_ram_fd(ram_addr_t addr)
1664{
ae3a7047
MD
1665 RAMBlock *block;
1666 int fd;
a35ba7be 1667
0dc3f44a 1668 rcu_read_lock();
ae3a7047
MD
1669 block = qemu_get_ram_block(addr);
1670 fd = block->fd;
0dc3f44a 1671 rcu_read_unlock();
ae3a7047 1672 return fd;
a35ba7be
PB
1673}
1674
3fd74b84
DM
1675void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1676{
ae3a7047
MD
1677 RAMBlock *block;
1678 void *ptr;
3fd74b84 1679
0dc3f44a 1680 rcu_read_lock();
ae3a7047
MD
1681 block = qemu_get_ram_block(addr);
1682 ptr = ramblock_ptr(block, 0);
0dc3f44a 1683 rcu_read_unlock();
ae3a7047 1684 return ptr;
3fd74b84
DM
1685}
1686
1b5ec234 1687/* Return a host pointer to ram allocated with qemu_ram_alloc.
ae3a7047
MD
1688 * This should not be used for general purpose DMA. Use address_space_map
1689 * or address_space_rw instead. For local memory (e.g. video ram) that the
1690 * device owns, use memory_region_get_ram_ptr.
0dc3f44a
MD
1691 *
1692 * By the time this function returns, the returned pointer is not protected
1693 * by RCU anymore. If the caller is not within an RCU critical section and
1694 * does not hold the iothread lock, it must have other means of protecting the
1695 * pointer, such as a reference to the region that includes the incoming
1696 * ram_addr_t.
1b5ec234
PB
1697 */
1698void *qemu_get_ram_ptr(ram_addr_t addr)
1699{
ae3a7047
MD
1700 RAMBlock *block;
1701 void *ptr;
1b5ec234 1702
0dc3f44a 1703 rcu_read_lock();
ae3a7047
MD
1704 block = qemu_get_ram_block(addr);
1705
1706 if (xen_enabled() && block->host == NULL) {
0d6d3c87
PB
1707 /* We need to check if the requested address is in the RAM
1708 * because we don't want to map the entire memory in QEMU.
1709 * In that case just map until the end of the page.
1710 */
1711 if (block->offset == 0) {
ae3a7047 1712 ptr = xen_map_cache(addr, 0, 0);
0dc3f44a 1713 goto unlock;
0d6d3c87 1714 }
ae3a7047
MD
1715
1716 block->host = xen_map_cache(block->offset, block->max_length, 1);
0d6d3c87 1717 }
ae3a7047
MD
1718 ptr = ramblock_ptr(block, addr - block->offset);
1719
0dc3f44a
MD
1720unlock:
1721 rcu_read_unlock();
ae3a7047 1722 return ptr;
dc828ca1
PB
1723}
1724
38bee5dc 1725/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
ae3a7047 1726 * but takes a size argument.
0dc3f44a
MD
1727 *
1728 * By the time this function returns, the returned pointer is not protected
1729 * by RCU anymore. If the caller is not within an RCU critical section and
1730 * does not hold the iothread lock, it must have other means of protecting the
1731 * pointer, such as a reference to the region that includes the incoming
1732 * ram_addr_t.
ae3a7047 1733 */
cb85f7ab 1734static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
38bee5dc 1735{
ae3a7047 1736 void *ptr;
8ab934f9
SS
1737 if (*size == 0) {
1738 return NULL;
1739 }
868bb33f 1740 if (xen_enabled()) {
e41d7c69 1741 return xen_map_cache(addr, *size, 1);
868bb33f 1742 } else {
38bee5dc 1743 RAMBlock *block;
0dc3f44a
MD
1744 rcu_read_lock();
1745 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5
MT
1746 if (addr - block->offset < block->max_length) {
1747 if (addr - block->offset + *size > block->max_length)
1748 *size = block->max_length - addr + block->offset;
ae3a7047 1749 ptr = ramblock_ptr(block, addr - block->offset);
0dc3f44a 1750 rcu_read_unlock();
ae3a7047 1751 return ptr;
38bee5dc
SS
1752 }
1753 }
1754
1755 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1756 abort();
38bee5dc
SS
1757 }
1758}
1759
7443b437 1760/* Some of the softmmu routines need to translate from a host pointer
ae3a7047
MD
1761 * (typically a TLB entry) back to a ram offset.
1762 *
1763 * By the time this function returns, the returned pointer is not protected
1764 * by RCU anymore. If the caller is not within an RCU critical section and
1765 * does not hold the iothread lock, it must have other means of protecting the
1766 * pointer, such as a reference to the region that includes the incoming
1767 * ram_addr_t.
1768 */
1b5ec234 1769MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
5579c7f3 1770{
94a6b54f
PB
1771 RAMBlock *block;
1772 uint8_t *host = ptr;
ae3a7047 1773 MemoryRegion *mr;
94a6b54f 1774
868bb33f 1775 if (xen_enabled()) {
0dc3f44a 1776 rcu_read_lock();
e41d7c69 1777 *ram_addr = xen_ram_addr_from_mapcache(ptr);
ae3a7047 1778 mr = qemu_get_ram_block(*ram_addr)->mr;
0dc3f44a 1779 rcu_read_unlock();
ae3a7047 1780 return mr;
712c2b41
SS
1781 }
1782
0dc3f44a
MD
1783 rcu_read_lock();
1784 block = atomic_rcu_read(&ram_list.mru_block);
9b8424d5 1785 if (block && block->host && host - block->host < block->max_length) {
23887b79
PB
1786 goto found;
1787 }
1788
0dc3f44a 1789 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
432d268c
JN
1790 /* This case append when the block is not mapped. */
1791 if (block->host == NULL) {
1792 continue;
1793 }
9b8424d5 1794 if (host - block->host < block->max_length) {
23887b79 1795 goto found;
f471a17e 1796 }
94a6b54f 1797 }
432d268c 1798
0dc3f44a 1799 rcu_read_unlock();
1b5ec234 1800 return NULL;
23887b79
PB
1801
1802found:
1803 *ram_addr = block->offset + (host - block->host);
ae3a7047 1804 mr = block->mr;
0dc3f44a 1805 rcu_read_unlock();
ae3a7047 1806 return mr;
e890261f 1807}
f471a17e 1808
a8170e5e 1809static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
0e0df1e2 1810 uint64_t val, unsigned size)
9fa3e853 1811{
52159192 1812 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
0e0df1e2 1813 tb_invalidate_phys_page_fast(ram_addr, size);
3a7d929e 1814 }
0e0df1e2
AK
1815 switch (size) {
1816 case 1:
1817 stb_p(qemu_get_ram_ptr(ram_addr), val);
1818 break;
1819 case 2:
1820 stw_p(qemu_get_ram_ptr(ram_addr), val);
1821 break;
1822 case 4:
1823 stl_p(qemu_get_ram_ptr(ram_addr), val);
1824 break;
1825 default:
1826 abort();
3a7d929e 1827 }
6886867e 1828 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
f23db169
FB
1829 /* we remove the notdirty callback only if the code has been
1830 flushed */
a2cd8c85 1831 if (!cpu_physical_memory_is_clean(ram_addr)) {
4917cf44 1832 CPUArchState *env = current_cpu->env_ptr;
93afeade 1833 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
4917cf44 1834 }
9fa3e853
FB
1835}
1836
b018ddf6
PB
1837static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1838 unsigned size, bool is_write)
1839{
1840 return is_write;
1841}
1842
0e0df1e2 1843static const MemoryRegionOps notdirty_mem_ops = {
0e0df1e2 1844 .write = notdirty_mem_write,
b018ddf6 1845 .valid.accepts = notdirty_mem_accepts,
0e0df1e2 1846 .endianness = DEVICE_NATIVE_ENDIAN,
1ccde1cb
FB
1847};
1848
0f459d16 1849/* Generate a debug exception if a watchpoint has been hit. */
05068c0d 1850static void check_watchpoint(int offset, int len, int flags)
0f459d16 1851{
93afeade
AF
1852 CPUState *cpu = current_cpu;
1853 CPUArchState *env = cpu->env_ptr;
06d55cc1 1854 target_ulong pc, cs_base;
0f459d16 1855 target_ulong vaddr;
a1d1bb31 1856 CPUWatchpoint *wp;
06d55cc1 1857 int cpu_flags;
0f459d16 1858
ff4700b0 1859 if (cpu->watchpoint_hit) {
06d55cc1
AL
1860 /* We re-entered the check after replacing the TB. Now raise
1861 * the debug interrupt so that is will trigger after the
1862 * current instruction. */
93afeade 1863 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
06d55cc1
AL
1864 return;
1865 }
93afeade 1866 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
ff4700b0 1867 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
05068c0d
PM
1868 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1869 && (wp->flags & flags)) {
08225676
PM
1870 if (flags == BP_MEM_READ) {
1871 wp->flags |= BP_WATCHPOINT_HIT_READ;
1872 } else {
1873 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1874 }
1875 wp->hitaddr = vaddr;
ff4700b0
AF
1876 if (!cpu->watchpoint_hit) {
1877 cpu->watchpoint_hit = wp;
239c51a5 1878 tb_check_watchpoint(cpu);
6e140f28 1879 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
27103424 1880 cpu->exception_index = EXCP_DEBUG;
5638d180 1881 cpu_loop_exit(cpu);
6e140f28
AL
1882 } else {
1883 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
648f034c 1884 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
0ea8cb88 1885 cpu_resume_from_signal(cpu, NULL);
6e140f28 1886 }
06d55cc1 1887 }
6e140f28
AL
1888 } else {
1889 wp->flags &= ~BP_WATCHPOINT_HIT;
0f459d16
PB
1890 }
1891 }
1892}
1893
6658ffb8
PB
1894/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1895 so these check for a hit then pass through to the normal out-of-line
1896 phys routines. */
a8170e5e 1897static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1ec9b909 1898 unsigned size)
6658ffb8 1899{
05068c0d 1900 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_READ);
1ec9b909 1901 switch (size) {
2c17449b 1902 case 1: return ldub_phys(&address_space_memory, addr);
41701aa4 1903 case 2: return lduw_phys(&address_space_memory, addr);
fdfba1a2 1904 case 4: return ldl_phys(&address_space_memory, addr);
1ec9b909
AK
1905 default: abort();
1906 }
6658ffb8
PB
1907}
1908
a8170e5e 1909static void watch_mem_write(void *opaque, hwaddr addr,
1ec9b909 1910 uint64_t val, unsigned size)
6658ffb8 1911{
05068c0d 1912 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, BP_MEM_WRITE);
1ec9b909 1913 switch (size) {
67364150 1914 case 1:
db3be60d 1915 stb_phys(&address_space_memory, addr, val);
67364150
MF
1916 break;
1917 case 2:
5ce5944d 1918 stw_phys(&address_space_memory, addr, val);
67364150
MF
1919 break;
1920 case 4:
ab1da857 1921 stl_phys(&address_space_memory, addr, val);
67364150 1922 break;
1ec9b909
AK
1923 default: abort();
1924 }
6658ffb8
PB
1925}
1926
1ec9b909
AK
1927static const MemoryRegionOps watch_mem_ops = {
1928 .read = watch_mem_read,
1929 .write = watch_mem_write,
1930 .endianness = DEVICE_NATIVE_ENDIAN,
6658ffb8 1931};
6658ffb8 1932
a8170e5e 1933static uint64_t subpage_read(void *opaque, hwaddr addr,
70c68e44 1934 unsigned len)
db7b5426 1935{
acc9d80b 1936 subpage_t *subpage = opaque;
ff6cff75 1937 uint8_t buf[8];
791af8c8 1938
db7b5426 1939#if defined(DEBUG_SUBPAGE)
016e9d62 1940 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
acc9d80b 1941 subpage, len, addr);
db7b5426 1942#endif
acc9d80b
JK
1943 address_space_read(subpage->as, addr + subpage->base, buf, len);
1944 switch (len) {
1945 case 1:
1946 return ldub_p(buf);
1947 case 2:
1948 return lduw_p(buf);
1949 case 4:
1950 return ldl_p(buf);
ff6cff75
PB
1951 case 8:
1952 return ldq_p(buf);
acc9d80b
JK
1953 default:
1954 abort();
1955 }
db7b5426
BS
1956}
1957
a8170e5e 1958static void subpage_write(void *opaque, hwaddr addr,
70c68e44 1959 uint64_t value, unsigned len)
db7b5426 1960{
acc9d80b 1961 subpage_t *subpage = opaque;
ff6cff75 1962 uint8_t buf[8];
acc9d80b 1963
db7b5426 1964#if defined(DEBUG_SUBPAGE)
016e9d62 1965 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
acc9d80b
JK
1966 " value %"PRIx64"\n",
1967 __func__, subpage, len, addr, value);
db7b5426 1968#endif
acc9d80b
JK
1969 switch (len) {
1970 case 1:
1971 stb_p(buf, value);
1972 break;
1973 case 2:
1974 stw_p(buf, value);
1975 break;
1976 case 4:
1977 stl_p(buf, value);
1978 break;
ff6cff75
PB
1979 case 8:
1980 stq_p(buf, value);
1981 break;
acc9d80b
JK
1982 default:
1983 abort();
1984 }
1985 address_space_write(subpage->as, addr + subpage->base, buf, len);
db7b5426
BS
1986}
1987
c353e4cc 1988static bool subpage_accepts(void *opaque, hwaddr addr,
016e9d62 1989 unsigned len, bool is_write)
c353e4cc 1990{
acc9d80b 1991 subpage_t *subpage = opaque;
c353e4cc 1992#if defined(DEBUG_SUBPAGE)
016e9d62 1993 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
acc9d80b 1994 __func__, subpage, is_write ? 'w' : 'r', len, addr);
c353e4cc
PB
1995#endif
1996
acc9d80b 1997 return address_space_access_valid(subpage->as, addr + subpage->base,
016e9d62 1998 len, is_write);
c353e4cc
PB
1999}
2000
70c68e44
AK
2001static const MemoryRegionOps subpage_ops = {
2002 .read = subpage_read,
2003 .write = subpage_write,
ff6cff75
PB
2004 .impl.min_access_size = 1,
2005 .impl.max_access_size = 8,
2006 .valid.min_access_size = 1,
2007 .valid.max_access_size = 8,
c353e4cc 2008 .valid.accepts = subpage_accepts,
70c68e44 2009 .endianness = DEVICE_NATIVE_ENDIAN,
db7b5426
BS
2010};
2011
c227f099 2012static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
5312bd8b 2013 uint16_t section)
db7b5426
BS
2014{
2015 int idx, eidx;
2016
2017 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2018 return -1;
2019 idx = SUBPAGE_IDX(start);
2020 eidx = SUBPAGE_IDX(end);
2021#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2022 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2023 __func__, mmio, start, end, idx, eidx, section);
db7b5426 2024#endif
db7b5426 2025 for (; idx <= eidx; idx++) {
5312bd8b 2026 mmio->sub_section[idx] = section;
db7b5426
BS
2027 }
2028
2029 return 0;
2030}
2031
acc9d80b 2032static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
db7b5426 2033{
c227f099 2034 subpage_t *mmio;
db7b5426 2035
7267c094 2036 mmio = g_malloc0(sizeof(subpage_t));
1eec614b 2037
acc9d80b 2038 mmio->as = as;
1eec614b 2039 mmio->base = base;
2c9b15ca 2040 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
b4fefef9 2041 NULL, TARGET_PAGE_SIZE);
b3b00c78 2042 mmio->iomem.subpage = true;
db7b5426 2043#if defined(DEBUG_SUBPAGE)
016e9d62
AK
2044 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2045 mmio, base, TARGET_PAGE_SIZE);
db7b5426 2046#endif
b41aac4f 2047 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
db7b5426
BS
2048
2049 return mmio;
2050}
2051
a656e22f
PC
2052static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2053 MemoryRegion *mr)
5312bd8b 2054{
a656e22f 2055 assert(as);
5312bd8b 2056 MemoryRegionSection section = {
a656e22f 2057 .address_space = as,
5312bd8b
AK
2058 .mr = mr,
2059 .offset_within_address_space = 0,
2060 .offset_within_region = 0,
052e87b0 2061 .size = int128_2_64(),
5312bd8b
AK
2062 };
2063
53cb28cb 2064 return phys_section_add(map, &section);
5312bd8b
AK
2065}
2066
9d82b5a7 2067MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
aa102231 2068{
79e2b9ae
PB
2069 AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
2070 MemoryRegionSection *sections = d->map.sections;
9d82b5a7
PB
2071
2072 return sections[index & ~TARGET_PAGE_MASK].mr;
aa102231
AK
2073}
2074
e9179ce1
AK
2075static void io_mem_init(void)
2076{
1f6245e5 2077 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2c9b15ca 2078 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1f6245e5 2079 NULL, UINT64_MAX);
2c9b15ca 2080 memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
1f6245e5 2081 NULL, UINT64_MAX);
2c9b15ca 2082 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1f6245e5 2083 NULL, UINT64_MAX);
e9179ce1
AK
2084}
2085
ac1970fb 2086static void mem_begin(MemoryListener *listener)
00752703
PB
2087{
2088 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
53cb28cb
MA
2089 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2090 uint16_t n;
2091
a656e22f 2092 n = dummy_section(&d->map, as, &io_mem_unassigned);
53cb28cb 2093 assert(n == PHYS_SECTION_UNASSIGNED);
a656e22f 2094 n = dummy_section(&d->map, as, &io_mem_notdirty);
53cb28cb 2095 assert(n == PHYS_SECTION_NOTDIRTY);
a656e22f 2096 n = dummy_section(&d->map, as, &io_mem_rom);
53cb28cb 2097 assert(n == PHYS_SECTION_ROM);
a656e22f 2098 n = dummy_section(&d->map, as, &io_mem_watch);
53cb28cb 2099 assert(n == PHYS_SECTION_WATCH);
00752703 2100
9736e55b 2101 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
00752703
PB
2102 d->as = as;
2103 as->next_dispatch = d;
2104}
2105
79e2b9ae
PB
2106static void address_space_dispatch_free(AddressSpaceDispatch *d)
2107{
2108 phys_sections_free(&d->map);
2109 g_free(d);
2110}
2111
00752703 2112static void mem_commit(MemoryListener *listener)
ac1970fb 2113{
89ae337a 2114 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
0475d94f
PB
2115 AddressSpaceDispatch *cur = as->dispatch;
2116 AddressSpaceDispatch *next = as->next_dispatch;
2117
53cb28cb 2118 phys_page_compact_all(next, next->map.nodes_nb);
b35ba30f 2119
79e2b9ae 2120 atomic_rcu_set(&as->dispatch, next);
53cb28cb 2121 if (cur) {
79e2b9ae 2122 call_rcu(cur, address_space_dispatch_free, rcu);
53cb28cb 2123 }
9affd6fc
PB
2124}
2125
1d71148e 2126static void tcg_commit(MemoryListener *listener)
50c1e149 2127{
182735ef 2128 CPUState *cpu;
117712c3
AK
2129
2130 /* since each CPU stores ram addresses in its TLB cache, we must
2131 reset the modified entries */
2132 /* XXX: slow ! */
bdc44640 2133 CPU_FOREACH(cpu) {
33bde2e1
EI
2134 /* FIXME: Disentangle the cpu.h circular files deps so we can
2135 directly get the right CPU from listener. */
2136 if (cpu->tcg_as_listener != listener) {
2137 continue;
2138 }
76e5c76f 2139 cpu_reload_memory_map(cpu);
117712c3 2140 }
50c1e149
AK
2141}
2142
93632747
AK
2143static void core_log_global_start(MemoryListener *listener)
2144{
981fdf23 2145 cpu_physical_memory_set_dirty_tracking(true);
93632747
AK
2146}
2147
2148static void core_log_global_stop(MemoryListener *listener)
2149{
981fdf23 2150 cpu_physical_memory_set_dirty_tracking(false);
93632747
AK
2151}
2152
93632747 2153static MemoryListener core_memory_listener = {
93632747
AK
2154 .log_global_start = core_log_global_start,
2155 .log_global_stop = core_log_global_stop,
ac1970fb 2156 .priority = 1,
93632747
AK
2157};
2158
ac1970fb
AK
2159void address_space_init_dispatch(AddressSpace *as)
2160{
00752703 2161 as->dispatch = NULL;
89ae337a 2162 as->dispatch_listener = (MemoryListener) {
ac1970fb 2163 .begin = mem_begin,
00752703 2164 .commit = mem_commit,
ac1970fb
AK
2165 .region_add = mem_add,
2166 .region_nop = mem_add,
2167 .priority = 0,
2168 };
89ae337a 2169 memory_listener_register(&as->dispatch_listener, as);
ac1970fb
AK
2170}
2171
6e48e8f9
PB
2172void address_space_unregister(AddressSpace *as)
2173{
2174 memory_listener_unregister(&as->dispatch_listener);
2175}
2176
83f3c251
AK
2177void address_space_destroy_dispatch(AddressSpace *as)
2178{
2179 AddressSpaceDispatch *d = as->dispatch;
2180
79e2b9ae
PB
2181 atomic_rcu_set(&as->dispatch, NULL);
2182 if (d) {
2183 call_rcu(d, address_space_dispatch_free, rcu);
2184 }
83f3c251
AK
2185}
2186
62152b8a
AK
2187static void memory_map_init(void)
2188{
7267c094 2189 system_memory = g_malloc(sizeof(*system_memory));
03f49957 2190
57271d63 2191 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
7dca8043 2192 address_space_init(&address_space_memory, system_memory, "memory");
309cb471 2193
7267c094 2194 system_io = g_malloc(sizeof(*system_io));
3bb28b72
JK
2195 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2196 65536);
7dca8043 2197 address_space_init(&address_space_io, system_io, "I/O");
93632747 2198
f6790af6 2199 memory_listener_register(&core_memory_listener, &address_space_memory);
62152b8a
AK
2200}
2201
2202MemoryRegion *get_system_memory(void)
2203{
2204 return system_memory;
2205}
2206
309cb471
AK
2207MemoryRegion *get_system_io(void)
2208{
2209 return system_io;
2210}
2211
e2eef170
PB
2212#endif /* !defined(CONFIG_USER_ONLY) */
2213
13eb76e0
FB
2214/* physical memory access (slow version, mainly for debug) */
2215#if defined(CONFIG_USER_ONLY)
f17ec444 2216int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
a68fe89c 2217 uint8_t *buf, int len, int is_write)
13eb76e0
FB
2218{
2219 int l, flags;
2220 target_ulong page;
53a5960a 2221 void * p;
13eb76e0
FB
2222
2223 while (len > 0) {
2224 page = addr & TARGET_PAGE_MASK;
2225 l = (page + TARGET_PAGE_SIZE) - addr;
2226 if (l > len)
2227 l = len;
2228 flags = page_get_flags(page);
2229 if (!(flags & PAGE_VALID))
a68fe89c 2230 return -1;
13eb76e0
FB
2231 if (is_write) {
2232 if (!(flags & PAGE_WRITE))
a68fe89c 2233 return -1;
579a97f7 2234 /* XXX: this code should not depend on lock_user */
72fb7daa 2235 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
a68fe89c 2236 return -1;
72fb7daa
AJ
2237 memcpy(p, buf, l);
2238 unlock_user(p, addr, l);
13eb76e0
FB
2239 } else {
2240 if (!(flags & PAGE_READ))
a68fe89c 2241 return -1;
579a97f7 2242 /* XXX: this code should not depend on lock_user */
72fb7daa 2243 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
a68fe89c 2244 return -1;
72fb7daa 2245 memcpy(buf, p, l);
5b257578 2246 unlock_user(p, addr, 0);
13eb76e0
FB
2247 }
2248 len -= l;
2249 buf += l;
2250 addr += l;
2251 }
a68fe89c 2252 return 0;
13eb76e0 2253}
8df1cd07 2254
13eb76e0 2255#else
51d7a9eb 2256
a8170e5e
AK
2257static void invalidate_and_set_dirty(hwaddr addr,
2258 hwaddr length)
51d7a9eb 2259{
f874bf90
PM
2260 if (cpu_physical_memory_range_includes_clean(addr, length)) {
2261 tb_invalidate_phys_range(addr, addr + length, 0);
6886867e 2262 cpu_physical_memory_set_dirty_range_nocode(addr, length);
51d7a9eb 2263 }
e226939d 2264 xen_modified_memory(addr, length);
51d7a9eb
AP
2265}
2266
23326164 2267static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
82f2563f 2268{
e1622f4b 2269 unsigned access_size_max = mr->ops->valid.max_access_size;
23326164
RH
2270
2271 /* Regions are assumed to support 1-4 byte accesses unless
2272 otherwise specified. */
23326164
RH
2273 if (access_size_max == 0) {
2274 access_size_max = 4;
2275 }
2276
2277 /* Bound the maximum access by the alignment of the address. */
2278 if (!mr->ops->impl.unaligned) {
2279 unsigned align_size_max = addr & -addr;
2280 if (align_size_max != 0 && align_size_max < access_size_max) {
2281 access_size_max = align_size_max;
2282 }
82f2563f 2283 }
23326164
RH
2284
2285 /* Don't attempt accesses larger than the maximum. */
2286 if (l > access_size_max) {
2287 l = access_size_max;
82f2563f 2288 }
098178f2
PB
2289 if (l & (l - 1)) {
2290 l = 1 << (qemu_fls(l) - 1);
2291 }
23326164
RH
2292
2293 return l;
82f2563f
PB
2294}
2295
fd8aaa76 2296bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
ac1970fb 2297 int len, bool is_write)
13eb76e0 2298{
149f54b5 2299 hwaddr l;
13eb76e0 2300 uint8_t *ptr;
791af8c8 2301 uint64_t val;
149f54b5 2302 hwaddr addr1;
5c8a00ce 2303 MemoryRegion *mr;
fd8aaa76 2304 bool error = false;
3b46e624 2305
13eb76e0 2306 while (len > 0) {
149f54b5 2307 l = len;
5c8a00ce 2308 mr = address_space_translate(as, addr, &addr1, &l, is_write);
3b46e624 2309
13eb76e0 2310 if (is_write) {
5c8a00ce
PB
2311 if (!memory_access_is_direct(mr, is_write)) {
2312 l = memory_access_size(mr, l, addr1);
4917cf44 2313 /* XXX: could force current_cpu to NULL to avoid
6a00d601 2314 potential bugs */
23326164
RH
2315 switch (l) {
2316 case 8:
2317 /* 64 bit write access */
2318 val = ldq_p(buf);
2319 error |= io_mem_write(mr, addr1, val, 8);
2320 break;
2321 case 4:
1c213d19 2322 /* 32 bit write access */
c27004ec 2323 val = ldl_p(buf);
5c8a00ce 2324 error |= io_mem_write(mr, addr1, val, 4);
23326164
RH
2325 break;
2326 case 2:
1c213d19 2327 /* 16 bit write access */
c27004ec 2328 val = lduw_p(buf);
5c8a00ce 2329 error |= io_mem_write(mr, addr1, val, 2);
23326164
RH
2330 break;
2331 case 1:
1c213d19 2332 /* 8 bit write access */
c27004ec 2333 val = ldub_p(buf);
5c8a00ce 2334 error |= io_mem_write(mr, addr1, val, 1);
23326164
RH
2335 break;
2336 default:
2337 abort();
13eb76e0 2338 }
2bbfa05d 2339 } else {
5c8a00ce 2340 addr1 += memory_region_get_ram_addr(mr);
13eb76e0 2341 /* RAM case */
5579c7f3 2342 ptr = qemu_get_ram_ptr(addr1);
13eb76e0 2343 memcpy(ptr, buf, l);
51d7a9eb 2344 invalidate_and_set_dirty(addr1, l);
13eb76e0
FB
2345 }
2346 } else {
5c8a00ce 2347 if (!memory_access_is_direct(mr, is_write)) {
13eb76e0 2348 /* I/O case */
5c8a00ce 2349 l = memory_access_size(mr, l, addr1);
23326164
RH
2350 switch (l) {
2351 case 8:
2352 /* 64 bit read access */
2353 error |= io_mem_read(mr, addr1, &val, 8);
2354 stq_p(buf, val);
2355 break;
2356 case 4:
13eb76e0 2357 /* 32 bit read access */
5c8a00ce 2358 error |= io_mem_read(mr, addr1, &val, 4);
c27004ec 2359 stl_p(buf, val);
23326164
RH
2360 break;
2361 case 2:
13eb76e0 2362 /* 16 bit read access */
5c8a00ce 2363 error |= io_mem_read(mr, addr1, &val, 2);
c27004ec 2364 stw_p(buf, val);
23326164
RH
2365 break;
2366 case 1:
1c213d19 2367 /* 8 bit read access */
5c8a00ce 2368 error |= io_mem_read(mr, addr1, &val, 1);
c27004ec 2369 stb_p(buf, val);
23326164
RH
2370 break;
2371 default:
2372 abort();
13eb76e0
FB
2373 }
2374 } else {
2375 /* RAM case */
5c8a00ce 2376 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
f3705d53 2377 memcpy(buf, ptr, l);
13eb76e0
FB
2378 }
2379 }
2380 len -= l;
2381 buf += l;
2382 addr += l;
2383 }
fd8aaa76
PB
2384
2385 return error;
13eb76e0 2386}
8df1cd07 2387
fd8aaa76 2388bool address_space_write(AddressSpace *as, hwaddr addr,
ac1970fb
AK
2389 const uint8_t *buf, int len)
2390{
fd8aaa76 2391 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
ac1970fb
AK
2392}
2393
fd8aaa76 2394bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
ac1970fb 2395{
fd8aaa76 2396 return address_space_rw(as, addr, buf, len, false);
ac1970fb
AK
2397}
2398
2399
a8170e5e 2400void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
ac1970fb
AK
2401 int len, int is_write)
2402{
fd8aaa76 2403 address_space_rw(&address_space_memory, addr, buf, len, is_write);
ac1970fb
AK
2404}
2405
582b55a9
AG
2406enum write_rom_type {
2407 WRITE_DATA,
2408 FLUSH_CACHE,
2409};
2410
2a221651 2411static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
582b55a9 2412 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
d0ecd2aa 2413{
149f54b5 2414 hwaddr l;
d0ecd2aa 2415 uint8_t *ptr;
149f54b5 2416 hwaddr addr1;
5c8a00ce 2417 MemoryRegion *mr;
3b46e624 2418
d0ecd2aa 2419 while (len > 0) {
149f54b5 2420 l = len;
2a221651 2421 mr = address_space_translate(as, addr, &addr1, &l, true);
3b46e624 2422
5c8a00ce
PB
2423 if (!(memory_region_is_ram(mr) ||
2424 memory_region_is_romd(mr))) {
d0ecd2aa
FB
2425 /* do nothing */
2426 } else {
5c8a00ce 2427 addr1 += memory_region_get_ram_addr(mr);
d0ecd2aa 2428 /* ROM/RAM case */
5579c7f3 2429 ptr = qemu_get_ram_ptr(addr1);
582b55a9
AG
2430 switch (type) {
2431 case WRITE_DATA:
2432 memcpy(ptr, buf, l);
2433 invalidate_and_set_dirty(addr1, l);
2434 break;
2435 case FLUSH_CACHE:
2436 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2437 break;
2438 }
d0ecd2aa
FB
2439 }
2440 len -= l;
2441 buf += l;
2442 addr += l;
2443 }
2444}
2445
582b55a9 2446/* used for ROM loading : can write in RAM and ROM */
2a221651 2447void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
582b55a9
AG
2448 const uint8_t *buf, int len)
2449{
2a221651 2450 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
582b55a9
AG
2451}
2452
2453void cpu_flush_icache_range(hwaddr start, int len)
2454{
2455 /*
2456 * This function should do the same thing as an icache flush that was
2457 * triggered from within the guest. For TCG we are always cache coherent,
2458 * so there is no need to flush anything. For KVM / Xen we need to flush
2459 * the host's instruction cache at least.
2460 */
2461 if (tcg_enabled()) {
2462 return;
2463 }
2464
2a221651
EI
2465 cpu_physical_memory_write_rom_internal(&address_space_memory,
2466 start, NULL, len, FLUSH_CACHE);
582b55a9
AG
2467}
2468
6d16c2f8 2469typedef struct {
d3e71559 2470 MemoryRegion *mr;
6d16c2f8 2471 void *buffer;
a8170e5e
AK
2472 hwaddr addr;
2473 hwaddr len;
c2cba0ff 2474 bool in_use;
6d16c2f8
AL
2475} BounceBuffer;
2476
2477static BounceBuffer bounce;
2478
ba223c29 2479typedef struct MapClient {
e95205e1 2480 QEMUBH *bh;
72cf2d4f 2481 QLIST_ENTRY(MapClient) link;
ba223c29
AL
2482} MapClient;
2483
38e047b5 2484QemuMutex map_client_list_lock;
72cf2d4f
BS
2485static QLIST_HEAD(map_client_list, MapClient) map_client_list
2486 = QLIST_HEAD_INITIALIZER(map_client_list);
ba223c29 2487
e95205e1
FZ
2488static void cpu_unregister_map_client_do(MapClient *client)
2489{
2490 QLIST_REMOVE(client, link);
2491 g_free(client);
2492}
2493
33b6c2ed
FZ
2494static void cpu_notify_map_clients_locked(void)
2495{
2496 MapClient *client;
2497
2498 while (!QLIST_EMPTY(&map_client_list)) {
2499 client = QLIST_FIRST(&map_client_list);
e95205e1
FZ
2500 qemu_bh_schedule(client->bh);
2501 cpu_unregister_map_client_do(client);
33b6c2ed
FZ
2502 }
2503}
2504
e95205e1 2505void cpu_register_map_client(QEMUBH *bh)
ba223c29 2506{
7267c094 2507 MapClient *client = g_malloc(sizeof(*client));
ba223c29 2508
38e047b5 2509 qemu_mutex_lock(&map_client_list_lock);
e95205e1 2510 client->bh = bh;
72cf2d4f 2511 QLIST_INSERT_HEAD(&map_client_list, client, link);
33b6c2ed
FZ
2512 if (!atomic_read(&bounce.in_use)) {
2513 cpu_notify_map_clients_locked();
2514 }
38e047b5 2515 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2516}
2517
38e047b5
FZ
2518void cpu_exec_init_all(void)
2519{
2520 qemu_mutex_init(&ram_list.mutex);
2521 memory_map_init();
2522 io_mem_init();
2523 qemu_mutex_init(&map_client_list_lock);
2524}
2525
e95205e1 2526void cpu_unregister_map_client(QEMUBH *bh)
ba223c29 2527{
e95205e1 2528 MapClient *client;
ba223c29 2529
e95205e1
FZ
2530 qemu_mutex_lock(&map_client_list_lock);
2531 QLIST_FOREACH(client, &map_client_list, link) {
2532 if (client->bh == bh) {
2533 cpu_unregister_map_client_do(client);
2534 break;
2535 }
2536 }
2537 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2538}
2539
2540static void cpu_notify_map_clients(void)
2541{
38e047b5 2542 qemu_mutex_lock(&map_client_list_lock);
33b6c2ed 2543 cpu_notify_map_clients_locked();
38e047b5 2544 qemu_mutex_unlock(&map_client_list_lock);
ba223c29
AL
2545}
2546
51644ab7
PB
2547bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2548{
5c8a00ce 2549 MemoryRegion *mr;
51644ab7
PB
2550 hwaddr l, xlat;
2551
2552 while (len > 0) {
2553 l = len;
5c8a00ce
PB
2554 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2555 if (!memory_access_is_direct(mr, is_write)) {
2556 l = memory_access_size(mr, l, addr);
2557 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
51644ab7
PB
2558 return false;
2559 }
2560 }
2561
2562 len -= l;
2563 addr += l;
2564 }
2565 return true;
2566}
2567
6d16c2f8
AL
2568/* Map a physical memory region into a host virtual address.
2569 * May map a subset of the requested range, given by and returned in *plen.
2570 * May return NULL if resources needed to perform the mapping are exhausted.
2571 * Use only for reads OR writes - not for read-modify-write operations.
ba223c29
AL
2572 * Use cpu_register_map_client() to know when retrying the map operation is
2573 * likely to succeed.
6d16c2f8 2574 */
ac1970fb 2575void *address_space_map(AddressSpace *as,
a8170e5e
AK
2576 hwaddr addr,
2577 hwaddr *plen,
ac1970fb 2578 bool is_write)
6d16c2f8 2579{
a8170e5e 2580 hwaddr len = *plen;
e3127ae0
PB
2581 hwaddr done = 0;
2582 hwaddr l, xlat, base;
2583 MemoryRegion *mr, *this_mr;
2584 ram_addr_t raddr;
6d16c2f8 2585
e3127ae0
PB
2586 if (len == 0) {
2587 return NULL;
2588 }
38bee5dc 2589
e3127ae0
PB
2590 l = len;
2591 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2592 if (!memory_access_is_direct(mr, is_write)) {
c2cba0ff 2593 if (atomic_xchg(&bounce.in_use, true)) {
e3127ae0 2594 return NULL;
6d16c2f8 2595 }
e85d9db5
KW
2596 /* Avoid unbounded allocations */
2597 l = MIN(l, TARGET_PAGE_SIZE);
2598 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
e3127ae0
PB
2599 bounce.addr = addr;
2600 bounce.len = l;
d3e71559
PB
2601
2602 memory_region_ref(mr);
2603 bounce.mr = mr;
e3127ae0
PB
2604 if (!is_write) {
2605 address_space_read(as, addr, bounce.buffer, l);
8ab934f9 2606 }
6d16c2f8 2607
e3127ae0
PB
2608 *plen = l;
2609 return bounce.buffer;
2610 }
2611
2612 base = xlat;
2613 raddr = memory_region_get_ram_addr(mr);
2614
2615 for (;;) {
6d16c2f8
AL
2616 len -= l;
2617 addr += l;
e3127ae0
PB
2618 done += l;
2619 if (len == 0) {
2620 break;
2621 }
2622
2623 l = len;
2624 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2625 if (this_mr != mr || xlat != base + done) {
2626 break;
2627 }
6d16c2f8 2628 }
e3127ae0 2629
d3e71559 2630 memory_region_ref(mr);
e3127ae0
PB
2631 *plen = done;
2632 return qemu_ram_ptr_length(raddr + base, plen);
6d16c2f8
AL
2633}
2634
ac1970fb 2635/* Unmaps a memory region previously mapped by address_space_map().
6d16c2f8
AL
2636 * Will also mark the memory as dirty if is_write == 1. access_len gives
2637 * the amount of memory that was actually read or written by the caller.
2638 */
a8170e5e
AK
2639void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2640 int is_write, hwaddr access_len)
6d16c2f8
AL
2641{
2642 if (buffer != bounce.buffer) {
d3e71559
PB
2643 MemoryRegion *mr;
2644 ram_addr_t addr1;
2645
2646 mr = qemu_ram_addr_from_host(buffer, &addr1);
2647 assert(mr != NULL);
6d16c2f8 2648 if (is_write) {
6886867e 2649 invalidate_and_set_dirty(addr1, access_len);
6d16c2f8 2650 }
868bb33f 2651 if (xen_enabled()) {
e41d7c69 2652 xen_invalidate_map_cache_entry(buffer);
050a0ddf 2653 }
d3e71559 2654 memory_region_unref(mr);
6d16c2f8
AL
2655 return;
2656 }
2657 if (is_write) {
ac1970fb 2658 address_space_write(as, bounce.addr, bounce.buffer, access_len);
6d16c2f8 2659 }
f8a83245 2660 qemu_vfree(bounce.buffer);
6d16c2f8 2661 bounce.buffer = NULL;
d3e71559 2662 memory_region_unref(bounce.mr);
c2cba0ff 2663 atomic_mb_set(&bounce.in_use, false);
ba223c29 2664 cpu_notify_map_clients();
6d16c2f8 2665}
d0ecd2aa 2666
a8170e5e
AK
2667void *cpu_physical_memory_map(hwaddr addr,
2668 hwaddr *plen,
ac1970fb
AK
2669 int is_write)
2670{
2671 return address_space_map(&address_space_memory, addr, plen, is_write);
2672}
2673
a8170e5e
AK
2674void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2675 int is_write, hwaddr access_len)
ac1970fb
AK
2676{
2677 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2678}
2679
8df1cd07 2680/* warning: addr must be aligned */
fdfba1a2 2681static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2682 enum device_endian endian)
8df1cd07 2683{
8df1cd07 2684 uint8_t *ptr;
791af8c8 2685 uint64_t val;
5c8a00ce 2686 MemoryRegion *mr;
149f54b5
PB
2687 hwaddr l = 4;
2688 hwaddr addr1;
8df1cd07 2689
fdfba1a2 2690 mr = address_space_translate(as, addr, &addr1, &l, false);
5c8a00ce 2691 if (l < 4 || !memory_access_is_direct(mr, false)) {
8df1cd07 2692 /* I/O case */
5c8a00ce 2693 io_mem_read(mr, addr1, &val, 4);
1e78bcc1
AG
2694#if defined(TARGET_WORDS_BIGENDIAN)
2695 if (endian == DEVICE_LITTLE_ENDIAN) {
2696 val = bswap32(val);
2697 }
2698#else
2699 if (endian == DEVICE_BIG_ENDIAN) {
2700 val = bswap32(val);
2701 }
2702#endif
8df1cd07
FB
2703 } else {
2704 /* RAM case */
5c8a00ce 2705 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2706 & TARGET_PAGE_MASK)
149f54b5 2707 + addr1);
1e78bcc1
AG
2708 switch (endian) {
2709 case DEVICE_LITTLE_ENDIAN:
2710 val = ldl_le_p(ptr);
2711 break;
2712 case DEVICE_BIG_ENDIAN:
2713 val = ldl_be_p(ptr);
2714 break;
2715 default:
2716 val = ldl_p(ptr);
2717 break;
2718 }
8df1cd07
FB
2719 }
2720 return val;
2721}
2722
fdfba1a2 2723uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2724{
fdfba1a2 2725 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2726}
2727
fdfba1a2 2728uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2729{
fdfba1a2 2730 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2731}
2732
fdfba1a2 2733uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2734{
fdfba1a2 2735 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2736}
2737
84b7b8e7 2738/* warning: addr must be aligned */
2c17449b 2739static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2740 enum device_endian endian)
84b7b8e7 2741{
84b7b8e7
FB
2742 uint8_t *ptr;
2743 uint64_t val;
5c8a00ce 2744 MemoryRegion *mr;
149f54b5
PB
2745 hwaddr l = 8;
2746 hwaddr addr1;
84b7b8e7 2747
2c17449b 2748 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2749 false);
2750 if (l < 8 || !memory_access_is_direct(mr, false)) {
84b7b8e7 2751 /* I/O case */
5c8a00ce 2752 io_mem_read(mr, addr1, &val, 8);
968a5627
PB
2753#if defined(TARGET_WORDS_BIGENDIAN)
2754 if (endian == DEVICE_LITTLE_ENDIAN) {
2755 val = bswap64(val);
2756 }
2757#else
2758 if (endian == DEVICE_BIG_ENDIAN) {
2759 val = bswap64(val);
2760 }
84b7b8e7
FB
2761#endif
2762 } else {
2763 /* RAM case */
5c8a00ce 2764 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2765 & TARGET_PAGE_MASK)
149f54b5 2766 + addr1);
1e78bcc1
AG
2767 switch (endian) {
2768 case DEVICE_LITTLE_ENDIAN:
2769 val = ldq_le_p(ptr);
2770 break;
2771 case DEVICE_BIG_ENDIAN:
2772 val = ldq_be_p(ptr);
2773 break;
2774 default:
2775 val = ldq_p(ptr);
2776 break;
2777 }
84b7b8e7
FB
2778 }
2779 return val;
2780}
2781
2c17449b 2782uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2783{
2c17449b 2784 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2785}
2786
2c17449b 2787uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2788{
2c17449b 2789 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2790}
2791
2c17449b 2792uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2793{
2c17449b 2794 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2795}
2796
aab33094 2797/* XXX: optimize */
2c17449b 2798uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
aab33094
FB
2799{
2800 uint8_t val;
2c17449b 2801 address_space_rw(as, addr, &val, 1, 0);
aab33094
FB
2802 return val;
2803}
2804
733f0b02 2805/* warning: addr must be aligned */
41701aa4 2806static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
1e78bcc1 2807 enum device_endian endian)
aab33094 2808{
733f0b02
MT
2809 uint8_t *ptr;
2810 uint64_t val;
5c8a00ce 2811 MemoryRegion *mr;
149f54b5
PB
2812 hwaddr l = 2;
2813 hwaddr addr1;
733f0b02 2814
41701aa4 2815 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2816 false);
2817 if (l < 2 || !memory_access_is_direct(mr, false)) {
733f0b02 2818 /* I/O case */
5c8a00ce 2819 io_mem_read(mr, addr1, &val, 2);
1e78bcc1
AG
2820#if defined(TARGET_WORDS_BIGENDIAN)
2821 if (endian == DEVICE_LITTLE_ENDIAN) {
2822 val = bswap16(val);
2823 }
2824#else
2825 if (endian == DEVICE_BIG_ENDIAN) {
2826 val = bswap16(val);
2827 }
2828#endif
733f0b02
MT
2829 } else {
2830 /* RAM case */
5c8a00ce 2831 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
06ef3525 2832 & TARGET_PAGE_MASK)
149f54b5 2833 + addr1);
1e78bcc1
AG
2834 switch (endian) {
2835 case DEVICE_LITTLE_ENDIAN:
2836 val = lduw_le_p(ptr);
2837 break;
2838 case DEVICE_BIG_ENDIAN:
2839 val = lduw_be_p(ptr);
2840 break;
2841 default:
2842 val = lduw_p(ptr);
2843 break;
2844 }
733f0b02
MT
2845 }
2846 return val;
aab33094
FB
2847}
2848
41701aa4 2849uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2850{
41701aa4 2851 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2852}
2853
41701aa4 2854uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2855{
41701aa4 2856 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2857}
2858
41701aa4 2859uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
1e78bcc1 2860{
41701aa4 2861 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2862}
2863
8df1cd07
FB
2864/* warning: addr must be aligned. The ram page is not masked as dirty
2865 and the code inside is not invalidated. It is useful if the dirty
2866 bits are used to track modified PTEs */
2198a121 2867void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
8df1cd07 2868{
8df1cd07 2869 uint8_t *ptr;
5c8a00ce 2870 MemoryRegion *mr;
149f54b5
PB
2871 hwaddr l = 4;
2872 hwaddr addr1;
8df1cd07 2873
2198a121 2874 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2875 true);
2876 if (l < 4 || !memory_access_is_direct(mr, true)) {
2877 io_mem_write(mr, addr1, val, 4);
8df1cd07 2878 } else {
5c8a00ce 2879 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2880 ptr = qemu_get_ram_ptr(addr1);
8df1cd07 2881 stl_p(ptr, val);
74576198
AL
2882
2883 if (unlikely(in_migration)) {
a2cd8c85 2884 if (cpu_physical_memory_is_clean(addr1)) {
74576198
AL
2885 /* invalidate code */
2886 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2887 /* set dirty bit */
6886867e 2888 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
74576198
AL
2889 }
2890 }
8df1cd07
FB
2891 }
2892}
2893
2894/* warning: addr must be aligned */
ab1da857
EI
2895static inline void stl_phys_internal(AddressSpace *as,
2896 hwaddr addr, uint32_t val,
1e78bcc1 2897 enum device_endian endian)
8df1cd07 2898{
8df1cd07 2899 uint8_t *ptr;
5c8a00ce 2900 MemoryRegion *mr;
149f54b5
PB
2901 hwaddr l = 4;
2902 hwaddr addr1;
8df1cd07 2903
ab1da857 2904 mr = address_space_translate(as, addr, &addr1, &l,
5c8a00ce
PB
2905 true);
2906 if (l < 4 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2907#if defined(TARGET_WORDS_BIGENDIAN)
2908 if (endian == DEVICE_LITTLE_ENDIAN) {
2909 val = bswap32(val);
2910 }
2911#else
2912 if (endian == DEVICE_BIG_ENDIAN) {
2913 val = bswap32(val);
2914 }
2915#endif
5c8a00ce 2916 io_mem_write(mr, addr1, val, 4);
8df1cd07 2917 } else {
8df1cd07 2918 /* RAM case */
5c8a00ce 2919 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
5579c7f3 2920 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2921 switch (endian) {
2922 case DEVICE_LITTLE_ENDIAN:
2923 stl_le_p(ptr, val);
2924 break;
2925 case DEVICE_BIG_ENDIAN:
2926 stl_be_p(ptr, val);
2927 break;
2928 default:
2929 stl_p(ptr, val);
2930 break;
2931 }
51d7a9eb 2932 invalidate_and_set_dirty(addr1, 4);
8df1cd07
FB
2933 }
2934}
2935
ab1da857 2936void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2937{
ab1da857 2938 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
2939}
2940
ab1da857 2941void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2942{
ab1da857 2943 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
2944}
2945
ab1da857 2946void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 2947{
ab1da857 2948 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
2949}
2950
aab33094 2951/* XXX: optimize */
db3be60d 2952void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
aab33094
FB
2953{
2954 uint8_t v = val;
db3be60d 2955 address_space_rw(as, addr, &v, 1, 1);
aab33094
FB
2956}
2957
733f0b02 2958/* warning: addr must be aligned */
5ce5944d
EI
2959static inline void stw_phys_internal(AddressSpace *as,
2960 hwaddr addr, uint32_t val,
1e78bcc1 2961 enum device_endian endian)
aab33094 2962{
733f0b02 2963 uint8_t *ptr;
5c8a00ce 2964 MemoryRegion *mr;
149f54b5
PB
2965 hwaddr l = 2;
2966 hwaddr addr1;
733f0b02 2967
5ce5944d 2968 mr = address_space_translate(as, addr, &addr1, &l, true);
5c8a00ce 2969 if (l < 2 || !memory_access_is_direct(mr, true)) {
1e78bcc1
AG
2970#if defined(TARGET_WORDS_BIGENDIAN)
2971 if (endian == DEVICE_LITTLE_ENDIAN) {
2972 val = bswap16(val);
2973 }
2974#else
2975 if (endian == DEVICE_BIG_ENDIAN) {
2976 val = bswap16(val);
2977 }
2978#endif
5c8a00ce 2979 io_mem_write(mr, addr1, val, 2);
733f0b02 2980 } else {
733f0b02 2981 /* RAM case */
5c8a00ce 2982 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
733f0b02 2983 ptr = qemu_get_ram_ptr(addr1);
1e78bcc1
AG
2984 switch (endian) {
2985 case DEVICE_LITTLE_ENDIAN:
2986 stw_le_p(ptr, val);
2987 break;
2988 case DEVICE_BIG_ENDIAN:
2989 stw_be_p(ptr, val);
2990 break;
2991 default:
2992 stw_p(ptr, val);
2993 break;
2994 }
51d7a9eb 2995 invalidate_and_set_dirty(addr1, 2);
733f0b02 2996 }
aab33094
FB
2997}
2998
5ce5944d 2999void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3000{
5ce5944d 3001 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
1e78bcc1
AG
3002}
3003
5ce5944d 3004void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3005{
5ce5944d 3006 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
1e78bcc1
AG
3007}
3008
5ce5944d 3009void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
1e78bcc1 3010{
5ce5944d 3011 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
1e78bcc1
AG
3012}
3013
aab33094 3014/* XXX: optimize */
f606604f 3015void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
aab33094
FB
3016{
3017 val = tswap64(val);
f606604f 3018 address_space_rw(as, addr, (void *) &val, 8, 1);
aab33094
FB
3019}
3020
f606604f 3021void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3022{
3023 val = cpu_to_le64(val);
f606604f 3024 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3025}
3026
f606604f 3027void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
1e78bcc1
AG
3028{
3029 val = cpu_to_be64(val);
f606604f 3030 address_space_rw(as, addr, (void *) &val, 8, 1);
1e78bcc1
AG
3031}
3032
5e2972fd 3033/* virtual memory access for debug (includes writing to ROM) */
f17ec444 3034int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
b448f2f3 3035 uint8_t *buf, int len, int is_write)
13eb76e0
FB
3036{
3037 int l;
a8170e5e 3038 hwaddr phys_addr;
9b3c35e0 3039 target_ulong page;
13eb76e0
FB
3040
3041 while (len > 0) {
3042 page = addr & TARGET_PAGE_MASK;
f17ec444 3043 phys_addr = cpu_get_phys_page_debug(cpu, page);
13eb76e0
FB
3044 /* if no physical page mapped, return an error */
3045 if (phys_addr == -1)
3046 return -1;
3047 l = (page + TARGET_PAGE_SIZE) - addr;
3048 if (l > len)
3049 l = len;
5e2972fd 3050 phys_addr += (addr & ~TARGET_PAGE_MASK);
2e38847b
EI
3051 if (is_write) {
3052 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3053 } else {
3054 address_space_rw(cpu->as, phys_addr, buf, l, 0);
3055 }
13eb76e0
FB
3056 len -= l;
3057 buf += l;
3058 addr += l;
3059 }
3060 return 0;
3061}
a68fe89c 3062#endif
13eb76e0 3063
8e4a424b
BS
3064/*
3065 * A helper function for the _utterly broken_ virtio device model to find out if
3066 * it's running on a big endian machine. Don't do this at home kids!
3067 */
98ed8ecf
GK
3068bool target_words_bigendian(void);
3069bool target_words_bigendian(void)
8e4a424b
BS
3070{
3071#if defined(TARGET_WORDS_BIGENDIAN)
3072 return true;
3073#else
3074 return false;
3075#endif
3076}
3077
76f35538 3078#ifndef CONFIG_USER_ONLY
a8170e5e 3079bool cpu_physical_memory_is_io(hwaddr phys_addr)
76f35538 3080{
5c8a00ce 3081 MemoryRegion*mr;
149f54b5 3082 hwaddr l = 1;
76f35538 3083
5c8a00ce
PB
3084 mr = address_space_translate(&address_space_memory,
3085 phys_addr, &phys_addr, &l, false);
76f35538 3086
5c8a00ce
PB
3087 return !(memory_region_is_ram(mr) ||
3088 memory_region_is_romd(mr));
76f35538 3089}
bd2fa51f
MH
3090
3091void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3092{
3093 RAMBlock *block;
3094
0dc3f44a
MD
3095 rcu_read_lock();
3096 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
9b8424d5 3097 func(block->host, block->offset, block->used_length, opaque);
bd2fa51f 3098 }
0dc3f44a 3099 rcu_read_unlock();
bd2fa51f 3100}
ec3f8c99 3101#endif