]> git.proxmox.com Git - qemu.git/blob - exec.c
fix live migration
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "dma.h"
38 #include "exec-memory.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
42 #include <sys/param.h>
43 #if __FreeBSD_version >= 700104
44 #define HAVE_KINFO_GETVMMAP
45 #define sigqueue sigqueue_freebsd /* avoid redefinition */
46 #include <sys/time.h>
47 #include <sys/proc.h>
48 #include <machine/profile.h>
49 #define _KERNEL
50 #include <sys/user.h>
51 #undef _KERNEL
52 #undef sigqueue
53 #include <libutil.h>
54 #endif
55 #endif
56 #else /* !CONFIG_USER_ONLY */
57 #include "xen-mapcache.h"
58 #include "trace.h"
59 #endif
60
61 #include "cputlb.h"
62
63 #include "memory-internal.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 uint8_t *code_gen_prologue;
90 static uint8_t *code_gen_buffer;
91 static size_t code_gen_buffer_size;
92 /* threshold to flush the translated code buffer */
93 static size_t code_gen_buffer_max_size;
94 static uint8_t *code_gen_ptr;
95
96 #if !defined(CONFIG_USER_ONLY)
97 int phys_ram_fd;
98 static int in_migration;
99
100 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101
102 static MemoryRegion *system_memory;
103 static MemoryRegion *system_io;
104
105 AddressSpace address_space_io;
106 AddressSpace address_space_memory;
107 DMAContext dma_context_memory;
108
109 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
110 static MemoryRegion io_mem_subpage_ram;
111
112 #endif
113
114 CPUArchState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 DEFINE_TLS(CPUArchState *,cpu_single_env);
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
122
123 typedef struct PageDesc {
124 /* list of TBs intersecting this ram page */
125 TranslationBlock *first_tb;
126 /* in order to optimize self modifying code, we count the number
127 of lookups we do to a given page to use a bitmap */
128 unsigned int code_write_count;
129 uint8_t *code_bitmap;
130 #if defined(CONFIG_USER_ONLY)
131 unsigned long flags;
132 #endif
133 } PageDesc;
134
135 /* In system mode we want L1_MAP to be based on ram offsets,
136 while in user mode we want it to be based on virtual addresses. */
137 #if !defined(CONFIG_USER_ONLY)
138 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
139 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
140 #else
141 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 #endif
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 #endif
146
147 /* Size of the L2 (and L3, etc) page tables. */
148 #define L2_BITS 10
149 #define L2_SIZE (1 << L2_BITS)
150
151 #define P_L2_LEVELS \
152 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
153
154 /* The bits remaining after N lower levels of page tables. */
155 #define V_L1_BITS_REM \
156 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
157
158 #if V_L1_BITS_REM < 4
159 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
160 #else
161 #define V_L1_BITS V_L1_BITS_REM
162 #endif
163
164 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
165
166 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
167
168 uintptr_t qemu_real_host_page_size;
169 uintptr_t qemu_host_page_size;
170 uintptr_t qemu_host_page_mask;
171
172 /* This is a multi-level map on the virtual address space.
173 The bottom level has pointers to PageDesc. */
174 static void *l1_map[V_L1_SIZE];
175
176 #if !defined(CONFIG_USER_ONLY)
177
178 static MemoryRegionSection *phys_sections;
179 static unsigned phys_sections_nb, phys_sections_nb_alloc;
180 static uint16_t phys_section_unassigned;
181 static uint16_t phys_section_notdirty;
182 static uint16_t phys_section_rom;
183 static uint16_t phys_section_watch;
184
185 /* Simple allocator for PhysPageEntry nodes */
186 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
187 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
188
189 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
190
191 static void io_mem_init(void);
192 static void memory_map_init(void);
193 static void *qemu_safe_ram_ptr(ram_addr_t addr);
194
195 static MemoryRegion io_mem_watch;
196 #endif
197 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
198 tb_page_addr_t phys_page2);
199
200 /* statistics */
201 static int tb_flush_count;
202 static int tb_phys_invalidate_count;
203
204 #ifdef _WIN32
205 static inline void map_exec(void *addr, long size)
206 {
207 DWORD old_protect;
208 VirtualProtect(addr, size,
209 PAGE_EXECUTE_READWRITE, &old_protect);
210
211 }
212 #else
213 static inline void map_exec(void *addr, long size)
214 {
215 unsigned long start, end, page_size;
216
217 page_size = getpagesize();
218 start = (unsigned long)addr;
219 start &= ~(page_size - 1);
220
221 end = (unsigned long)addr + size;
222 end += page_size - 1;
223 end &= ~(page_size - 1);
224
225 mprotect((void *)start, end - start,
226 PROT_READ | PROT_WRITE | PROT_EXEC);
227 }
228 #endif
229
230 static void page_init(void)
231 {
232 /* NOTE: we can always suppose that qemu_host_page_size >=
233 TARGET_PAGE_SIZE */
234 #ifdef _WIN32
235 {
236 SYSTEM_INFO system_info;
237
238 GetSystemInfo(&system_info);
239 qemu_real_host_page_size = system_info.dwPageSize;
240 }
241 #else
242 qemu_real_host_page_size = getpagesize();
243 #endif
244 if (qemu_host_page_size == 0)
245 qemu_host_page_size = qemu_real_host_page_size;
246 if (qemu_host_page_size < TARGET_PAGE_SIZE)
247 qemu_host_page_size = TARGET_PAGE_SIZE;
248 qemu_host_page_mask = ~(qemu_host_page_size - 1);
249
250 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
251 {
252 #ifdef HAVE_KINFO_GETVMMAP
253 struct kinfo_vmentry *freep;
254 int i, cnt;
255
256 freep = kinfo_getvmmap(getpid(), &cnt);
257 if (freep) {
258 mmap_lock();
259 for (i = 0; i < cnt; i++) {
260 unsigned long startaddr, endaddr;
261
262 startaddr = freep[i].kve_start;
263 endaddr = freep[i].kve_end;
264 if (h2g_valid(startaddr)) {
265 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
266
267 if (h2g_valid(endaddr)) {
268 endaddr = h2g(endaddr);
269 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
270 } else {
271 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
272 endaddr = ~0ul;
273 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
274 #endif
275 }
276 }
277 }
278 free(freep);
279 mmap_unlock();
280 }
281 #else
282 FILE *f;
283
284 last_brk = (unsigned long)sbrk(0);
285
286 f = fopen("/compat/linux/proc/self/maps", "r");
287 if (f) {
288 mmap_lock();
289
290 do {
291 unsigned long startaddr, endaddr;
292 int n;
293
294 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
295
296 if (n == 2 && h2g_valid(startaddr)) {
297 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
298
299 if (h2g_valid(endaddr)) {
300 endaddr = h2g(endaddr);
301 } else {
302 endaddr = ~0ul;
303 }
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
305 }
306 } while (!feof(f));
307
308 fclose(f);
309 mmap_unlock();
310 }
311 #endif
312 }
313 #endif
314 }
315
316 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
317 {
318 PageDesc *pd;
319 void **lp;
320 int i;
321
322 #if defined(CONFIG_USER_ONLY)
323 /* We can't use g_malloc because it may recurse into a locked mutex. */
324 # define ALLOC(P, SIZE) \
325 do { \
326 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
327 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
328 } while (0)
329 #else
330 # define ALLOC(P, SIZE) \
331 do { P = g_malloc0(SIZE); } while (0)
332 #endif
333
334 /* Level 1. Always allocated. */
335 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
336
337 /* Level 2..N-1. */
338 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
339 void **p = *lp;
340
341 if (p == NULL) {
342 if (!alloc) {
343 return NULL;
344 }
345 ALLOC(p, sizeof(void *) * L2_SIZE);
346 *lp = p;
347 }
348
349 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
350 }
351
352 pd = *lp;
353 if (pd == NULL) {
354 if (!alloc) {
355 return NULL;
356 }
357 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
358 *lp = pd;
359 }
360
361 #undef ALLOC
362
363 return pd + (index & (L2_SIZE - 1));
364 }
365
366 static inline PageDesc *page_find(tb_page_addr_t index)
367 {
368 return page_find_alloc(index, 0);
369 }
370
371 #if !defined(CONFIG_USER_ONLY)
372
373 static void phys_map_node_reserve(unsigned nodes)
374 {
375 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
376 typedef PhysPageEntry Node[L2_SIZE];
377 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
378 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
379 phys_map_nodes_nb + nodes);
380 phys_map_nodes = g_renew(Node, phys_map_nodes,
381 phys_map_nodes_nb_alloc);
382 }
383 }
384
385 static uint16_t phys_map_node_alloc(void)
386 {
387 unsigned i;
388 uint16_t ret;
389
390 ret = phys_map_nodes_nb++;
391 assert(ret != PHYS_MAP_NODE_NIL);
392 assert(ret != phys_map_nodes_nb_alloc);
393 for (i = 0; i < L2_SIZE; ++i) {
394 phys_map_nodes[ret][i].is_leaf = 0;
395 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
396 }
397 return ret;
398 }
399
400 static void phys_map_nodes_reset(void)
401 {
402 phys_map_nodes_nb = 0;
403 }
404
405
406 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
407 hwaddr *nb, uint16_t leaf,
408 int level)
409 {
410 PhysPageEntry *p;
411 int i;
412 hwaddr step = (hwaddr)1 << (level * L2_BITS);
413
414 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
415 lp->ptr = phys_map_node_alloc();
416 p = phys_map_nodes[lp->ptr];
417 if (level == 0) {
418 for (i = 0; i < L2_SIZE; i++) {
419 p[i].is_leaf = 1;
420 p[i].ptr = phys_section_unassigned;
421 }
422 }
423 } else {
424 p = phys_map_nodes[lp->ptr];
425 }
426 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
427
428 while (*nb && lp < &p[L2_SIZE]) {
429 if ((*index & (step - 1)) == 0 && *nb >= step) {
430 lp->is_leaf = true;
431 lp->ptr = leaf;
432 *index += step;
433 *nb -= step;
434 } else {
435 phys_page_set_level(lp, index, nb, leaf, level - 1);
436 }
437 ++lp;
438 }
439 }
440
441 static void phys_page_set(AddressSpaceDispatch *d,
442 hwaddr index, hwaddr nb,
443 uint16_t leaf)
444 {
445 /* Wildly overreserve - it doesn't matter much. */
446 phys_map_node_reserve(3 * P_L2_LEVELS);
447
448 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
449 }
450
451 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
452 {
453 PhysPageEntry lp = d->phys_map;
454 PhysPageEntry *p;
455 int i;
456 uint16_t s_index = phys_section_unassigned;
457
458 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
459 if (lp.ptr == PHYS_MAP_NODE_NIL) {
460 goto not_found;
461 }
462 p = phys_map_nodes[lp.ptr];
463 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
464 }
465
466 s_index = lp.ptr;
467 not_found:
468 return &phys_sections[s_index];
469 }
470
471 bool memory_region_is_unassigned(MemoryRegion *mr)
472 {
473 return mr != &io_mem_ram && mr != &io_mem_rom
474 && mr != &io_mem_notdirty && !mr->rom_device
475 && mr != &io_mem_watch;
476 }
477
478 #define mmap_lock() do { } while(0)
479 #define mmap_unlock() do { } while(0)
480 #endif
481
482 #if defined(CONFIG_USER_ONLY)
483 /* Currently it is not recommended to allocate big chunks of data in
484 user mode. It will change when a dedicated libc will be used. */
485 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
486 region in which the guest needs to run. Revisit this. */
487 #define USE_STATIC_CODE_GEN_BUFFER
488 #endif
489
490 /* ??? Should configure for this, not list operating systems here. */
491 #if (defined(__linux__) \
492 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
493 || defined(__DragonFly__) || defined(__OpenBSD__) \
494 || defined(__NetBSD__))
495 # define USE_MMAP
496 #endif
497
498 /* Minimum size of the code gen buffer. This number is randomly chosen,
499 but not so small that we can't have a fair number of TB's live. */
500 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
501
502 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
503 indicated, this is constrained by the range of direct branches on the
504 host cpu, as used by the TCG implementation of goto_tb. */
505 #if defined(__x86_64__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__sparc__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
509 #elif defined(__arm__)
510 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
511 #elif defined(__s390x__)
512 /* We have a +- 4GB range on the branches; leave some slop. */
513 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
514 #else
515 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
516 #endif
517
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
519
520 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
521 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
522 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
523
524 static inline size_t size_code_gen_buffer(size_t tb_size)
525 {
526 /* Size the buffer. */
527 if (tb_size == 0) {
528 #ifdef USE_STATIC_CODE_GEN_BUFFER
529 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
530 #else
531 /* ??? Needs adjustments. */
532 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
533 static buffer, we could size this on RESERVED_VA, on the text
534 segment size of the executable, or continue to use the default. */
535 tb_size = (unsigned long)(ram_size / 4);
536 #endif
537 }
538 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
539 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
540 }
541 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
542 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
543 }
544 code_gen_buffer_size = tb_size;
545 return tb_size;
546 }
547
548 #ifdef USE_STATIC_CODE_GEN_BUFFER
549 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
550 __attribute__((aligned(CODE_GEN_ALIGN)));
551
552 static inline void *alloc_code_gen_buffer(void)
553 {
554 map_exec(static_code_gen_buffer, code_gen_buffer_size);
555 return static_code_gen_buffer;
556 }
557 #elif defined(USE_MMAP)
558 static inline void *alloc_code_gen_buffer(void)
559 {
560 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
561 uintptr_t start = 0;
562 void *buf;
563
564 /* Constrain the position of the buffer based on the host cpu.
565 Note that these addresses are chosen in concert with the
566 addresses assigned in the relevant linker script file. */
567 # if defined(__PIE__) || defined(__PIC__)
568 /* Don't bother setting a preferred location if we're building
569 a position-independent executable. We're more likely to get
570 an address near the main executable if we let the kernel
571 choose the address. */
572 # elif defined(__x86_64__) && defined(MAP_32BIT)
573 /* Force the memory down into low memory with the executable.
574 Leave the choice of exact location with the kernel. */
575 flags |= MAP_32BIT;
576 /* Cannot expect to map more than 800MB in low memory. */
577 if (code_gen_buffer_size > 800u * 1024 * 1024) {
578 code_gen_buffer_size = 800u * 1024 * 1024;
579 }
580 # elif defined(__sparc__)
581 start = 0x40000000ul;
582 # elif defined(__s390x__)
583 start = 0x90000000ul;
584 # endif
585
586 buf = mmap((void *)start, code_gen_buffer_size,
587 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
588 return buf == MAP_FAILED ? NULL : buf;
589 }
590 #else
591 static inline void *alloc_code_gen_buffer(void)
592 {
593 void *buf = g_malloc(code_gen_buffer_size);
594 if (buf) {
595 map_exec(buf, code_gen_buffer_size);
596 }
597 return buf;
598 }
599 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
600
601 static inline void code_gen_alloc(size_t tb_size)
602 {
603 code_gen_buffer_size = size_code_gen_buffer(tb_size);
604 code_gen_buffer = alloc_code_gen_buffer();
605 if (code_gen_buffer == NULL) {
606 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
607 exit(1);
608 }
609
610 /* Steal room for the prologue at the end of the buffer. This ensures
611 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
612 from TB's to the prologue are going to be in range. It also means
613 that we don't need to mark (additional) portions of the data segment
614 as executable. */
615 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
616 code_gen_buffer_size -= 1024;
617
618 code_gen_buffer_max_size = code_gen_buffer_size -
619 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
620 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
621 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
622 }
623
624 /* Must be called before using the QEMU cpus. 'tb_size' is the size
625 (in bytes) allocated to the translation buffer. Zero means default
626 size. */
627 void tcg_exec_init(unsigned long tb_size)
628 {
629 cpu_gen_init();
630 code_gen_alloc(tb_size);
631 code_gen_ptr = code_gen_buffer;
632 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
633 page_init();
634 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
635 /* There's no guest base to take into account, so go ahead and
636 initialize the prologue now. */
637 tcg_prologue_init(&tcg_ctx);
638 #endif
639 }
640
641 bool tcg_enabled(void)
642 {
643 return code_gen_buffer != NULL;
644 }
645
646 void cpu_exec_init_all(void)
647 {
648 #if !defined(CONFIG_USER_ONLY)
649 memory_map_init();
650 io_mem_init();
651 #endif
652 }
653
654 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
655
656 static int cpu_common_post_load(void *opaque, int version_id)
657 {
658 CPUArchState *env = opaque;
659
660 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
661 version_id is increased. */
662 env->interrupt_request &= ~0x01;
663 tlb_flush(env, 1);
664
665 return 0;
666 }
667
668 static const VMStateDescription vmstate_cpu_common = {
669 .name = "cpu_common",
670 .version_id = 1,
671 .minimum_version_id = 1,
672 .minimum_version_id_old = 1,
673 .post_load = cpu_common_post_load,
674 .fields = (VMStateField []) {
675 VMSTATE_UINT32(halted, CPUArchState),
676 VMSTATE_UINT32(interrupt_request, CPUArchState),
677 VMSTATE_END_OF_LIST()
678 }
679 };
680 #endif
681
682 CPUArchState *qemu_get_cpu(int cpu)
683 {
684 CPUArchState *env = first_cpu;
685
686 while (env) {
687 if (env->cpu_index == cpu)
688 break;
689 env = env->next_cpu;
690 }
691
692 return env;
693 }
694
695 void cpu_exec_init(CPUArchState *env)
696 {
697 #ifndef CONFIG_USER_ONLY
698 CPUState *cpu = ENV_GET_CPU(env);
699 #endif
700 CPUArchState **penv;
701 int cpu_index;
702
703 #if defined(CONFIG_USER_ONLY)
704 cpu_list_lock();
705 #endif
706 env->next_cpu = NULL;
707 penv = &first_cpu;
708 cpu_index = 0;
709 while (*penv != NULL) {
710 penv = &(*penv)->next_cpu;
711 cpu_index++;
712 }
713 env->cpu_index = cpu_index;
714 env->numa_node = 0;
715 QTAILQ_INIT(&env->breakpoints);
716 QTAILQ_INIT(&env->watchpoints);
717 #ifndef CONFIG_USER_ONLY
718 cpu->thread_id = qemu_get_thread_id();
719 #endif
720 *penv = env;
721 #if defined(CONFIG_USER_ONLY)
722 cpu_list_unlock();
723 #endif
724 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
725 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
726 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
727 cpu_save, cpu_load, env);
728 #endif
729 }
730
731 /* Allocate a new translation block. Flush the translation buffer if
732 too many translation blocks or too much generated code. */
733 static TranslationBlock *tb_alloc(target_ulong pc)
734 {
735 TranslationBlock *tb;
736
737 if (nb_tbs >= code_gen_max_blocks ||
738 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
739 return NULL;
740 tb = &tbs[nb_tbs++];
741 tb->pc = pc;
742 tb->cflags = 0;
743 return tb;
744 }
745
746 void tb_free(TranslationBlock *tb)
747 {
748 /* In practice this is mostly used for single use temporary TB
749 Ignore the hard cases and just back up if this TB happens to
750 be the last one generated. */
751 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
752 code_gen_ptr = tb->tc_ptr;
753 nb_tbs--;
754 }
755 }
756
757 static inline void invalidate_page_bitmap(PageDesc *p)
758 {
759 if (p->code_bitmap) {
760 g_free(p->code_bitmap);
761 p->code_bitmap = NULL;
762 }
763 p->code_write_count = 0;
764 }
765
766 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
767
768 static void page_flush_tb_1 (int level, void **lp)
769 {
770 int i;
771
772 if (*lp == NULL) {
773 return;
774 }
775 if (level == 0) {
776 PageDesc *pd = *lp;
777 for (i = 0; i < L2_SIZE; ++i) {
778 pd[i].first_tb = NULL;
779 invalidate_page_bitmap(pd + i);
780 }
781 } else {
782 void **pp = *lp;
783 for (i = 0; i < L2_SIZE; ++i) {
784 page_flush_tb_1 (level - 1, pp + i);
785 }
786 }
787 }
788
789 static void page_flush_tb(void)
790 {
791 int i;
792 for (i = 0; i < V_L1_SIZE; i++) {
793 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
794 }
795 }
796
797 /* flush all the translation blocks */
798 /* XXX: tb_flush is currently not thread safe */
799 void tb_flush(CPUArchState *env1)
800 {
801 CPUArchState *env;
802 #if defined(DEBUG_FLUSH)
803 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
804 (unsigned long)(code_gen_ptr - code_gen_buffer),
805 nb_tbs, nb_tbs > 0 ?
806 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
807 #endif
808 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
809 cpu_abort(env1, "Internal error: code buffer overflow\n");
810
811 nb_tbs = 0;
812
813 for(env = first_cpu; env != NULL; env = env->next_cpu) {
814 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
815 }
816
817 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
818 page_flush_tb();
819
820 code_gen_ptr = code_gen_buffer;
821 /* XXX: flush processor icache at this point if cache flush is
822 expensive */
823 tb_flush_count++;
824 }
825
826 #ifdef DEBUG_TB_CHECK
827
828 static void tb_invalidate_check(target_ulong address)
829 {
830 TranslationBlock *tb;
831 int i;
832 address &= TARGET_PAGE_MASK;
833 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
834 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
835 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
836 address >= tb->pc + tb->size)) {
837 printf("ERROR invalidate: address=" TARGET_FMT_lx
838 " PC=%08lx size=%04x\n",
839 address, (long)tb->pc, tb->size);
840 }
841 }
842 }
843 }
844
845 /* verify that all the pages have correct rights for code */
846 static void tb_page_check(void)
847 {
848 TranslationBlock *tb;
849 int i, flags1, flags2;
850
851 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
852 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
853 flags1 = page_get_flags(tb->pc);
854 flags2 = page_get_flags(tb->pc + tb->size - 1);
855 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
856 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
857 (long)tb->pc, tb->size, flags1, flags2);
858 }
859 }
860 }
861 }
862
863 #endif
864
865 /* invalidate one TB */
866 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
867 int next_offset)
868 {
869 TranslationBlock *tb1;
870 for(;;) {
871 tb1 = *ptb;
872 if (tb1 == tb) {
873 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
874 break;
875 }
876 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
877 }
878 }
879
880 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
881 {
882 TranslationBlock *tb1;
883 unsigned int n1;
884
885 for(;;) {
886 tb1 = *ptb;
887 n1 = (uintptr_t)tb1 & 3;
888 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
889 if (tb1 == tb) {
890 *ptb = tb1->page_next[n1];
891 break;
892 }
893 ptb = &tb1->page_next[n1];
894 }
895 }
896
897 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
898 {
899 TranslationBlock *tb1, **ptb;
900 unsigned int n1;
901
902 ptb = &tb->jmp_next[n];
903 tb1 = *ptb;
904 if (tb1) {
905 /* find tb(n) in circular list */
906 for(;;) {
907 tb1 = *ptb;
908 n1 = (uintptr_t)tb1 & 3;
909 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
910 if (n1 == n && tb1 == tb)
911 break;
912 if (n1 == 2) {
913 ptb = &tb1->jmp_first;
914 } else {
915 ptb = &tb1->jmp_next[n1];
916 }
917 }
918 /* now we can suppress tb(n) from the list */
919 *ptb = tb->jmp_next[n];
920
921 tb->jmp_next[n] = NULL;
922 }
923 }
924
925 /* reset the jump entry 'n' of a TB so that it is not chained to
926 another TB */
927 static inline void tb_reset_jump(TranslationBlock *tb, int n)
928 {
929 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
930 }
931
932 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
933 {
934 CPUArchState *env;
935 PageDesc *p;
936 unsigned int h, n1;
937 tb_page_addr_t phys_pc;
938 TranslationBlock *tb1, *tb2;
939
940 /* remove the TB from the hash list */
941 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
942 h = tb_phys_hash_func(phys_pc);
943 tb_remove(&tb_phys_hash[h], tb,
944 offsetof(TranslationBlock, phys_hash_next));
945
946 /* remove the TB from the page list */
947 if (tb->page_addr[0] != page_addr) {
948 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
951 }
952 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
953 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
954 tb_page_remove(&p->first_tb, tb);
955 invalidate_page_bitmap(p);
956 }
957
958 tb_invalidated_flag = 1;
959
960 /* remove the TB from the hash list */
961 h = tb_jmp_cache_hash_func(tb->pc);
962 for(env = first_cpu; env != NULL; env = env->next_cpu) {
963 if (env->tb_jmp_cache[h] == tb)
964 env->tb_jmp_cache[h] = NULL;
965 }
966
967 /* suppress this TB from the two jump lists */
968 tb_jmp_remove(tb, 0);
969 tb_jmp_remove(tb, 1);
970
971 /* suppress any remaining jumps to this TB */
972 tb1 = tb->jmp_first;
973 for(;;) {
974 n1 = (uintptr_t)tb1 & 3;
975 if (n1 == 2)
976 break;
977 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
978 tb2 = tb1->jmp_next[n1];
979 tb_reset_jump(tb1, n1);
980 tb1->jmp_next[n1] = NULL;
981 tb1 = tb2;
982 }
983 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
984
985 tb_phys_invalidate_count++;
986 }
987
988 static inline void set_bits(uint8_t *tab, int start, int len)
989 {
990 int end, mask, end1;
991
992 end = start + len;
993 tab += start >> 3;
994 mask = 0xff << (start & 7);
995 if ((start & ~7) == (end & ~7)) {
996 if (start < end) {
997 mask &= ~(0xff << (end & 7));
998 *tab |= mask;
999 }
1000 } else {
1001 *tab++ |= mask;
1002 start = (start + 8) & ~7;
1003 end1 = end & ~7;
1004 while (start < end1) {
1005 *tab++ = 0xff;
1006 start += 8;
1007 }
1008 if (start < end) {
1009 mask = ~(0xff << (end & 7));
1010 *tab |= mask;
1011 }
1012 }
1013 }
1014
1015 static void build_page_bitmap(PageDesc *p)
1016 {
1017 int n, tb_start, tb_end;
1018 TranslationBlock *tb;
1019
1020 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1021
1022 tb = p->first_tb;
1023 while (tb != NULL) {
1024 n = (uintptr_t)tb & 3;
1025 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1026 /* NOTE: this is subtle as a TB may span two physical pages */
1027 if (n == 0) {
1028 /* NOTE: tb_end may be after the end of the page, but
1029 it is not a problem */
1030 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1031 tb_end = tb_start + tb->size;
1032 if (tb_end > TARGET_PAGE_SIZE)
1033 tb_end = TARGET_PAGE_SIZE;
1034 } else {
1035 tb_start = 0;
1036 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1037 }
1038 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1039 tb = tb->page_next[n];
1040 }
1041 }
1042
1043 TranslationBlock *tb_gen_code(CPUArchState *env,
1044 target_ulong pc, target_ulong cs_base,
1045 int flags, int cflags)
1046 {
1047 TranslationBlock *tb;
1048 uint8_t *tc_ptr;
1049 tb_page_addr_t phys_pc, phys_page2;
1050 target_ulong virt_page2;
1051 int code_gen_size;
1052
1053 phys_pc = get_page_addr_code(env, pc);
1054 tb = tb_alloc(pc);
1055 if (!tb) {
1056 /* flush must be done */
1057 tb_flush(env);
1058 /* cannot fail at this point */
1059 tb = tb_alloc(pc);
1060 /* Don't forget to invalidate previous TB info. */
1061 tb_invalidated_flag = 1;
1062 }
1063 tc_ptr = code_gen_ptr;
1064 tb->tc_ptr = tc_ptr;
1065 tb->cs_base = cs_base;
1066 tb->flags = flags;
1067 tb->cflags = cflags;
1068 cpu_gen_code(env, tb, &code_gen_size);
1069 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1070 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1071
1072 /* check next page if needed */
1073 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1074 phys_page2 = -1;
1075 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1076 phys_page2 = get_page_addr_code(env, virt_page2);
1077 }
1078 tb_link_page(tb, phys_pc, phys_page2);
1079 return tb;
1080 }
1081
1082 /*
1083 * Invalidate all TBs which intersect with the target physical address range
1084 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1085 * 'is_cpu_write_access' should be true if called from a real cpu write
1086 * access: the virtual CPU will exit the current TB if code is modified inside
1087 * this TB.
1088 */
1089 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1090 int is_cpu_write_access)
1091 {
1092 while (start < end) {
1093 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1094 start &= TARGET_PAGE_MASK;
1095 start += TARGET_PAGE_SIZE;
1096 }
1097 }
1098
1099 /*
1100 * Invalidate all TBs which intersect with the target physical address range
1101 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1102 * 'is_cpu_write_access' should be true if called from a real cpu write
1103 * access: the virtual CPU will exit the current TB if code is modified inside
1104 * this TB.
1105 */
1106 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1107 int is_cpu_write_access)
1108 {
1109 TranslationBlock *tb, *tb_next, *saved_tb;
1110 CPUArchState *env = cpu_single_env;
1111 tb_page_addr_t tb_start, tb_end;
1112 PageDesc *p;
1113 int n;
1114 #ifdef TARGET_HAS_PRECISE_SMC
1115 int current_tb_not_found = is_cpu_write_access;
1116 TranslationBlock *current_tb = NULL;
1117 int current_tb_modified = 0;
1118 target_ulong current_pc = 0;
1119 target_ulong current_cs_base = 0;
1120 int current_flags = 0;
1121 #endif /* TARGET_HAS_PRECISE_SMC */
1122
1123 p = page_find(start >> TARGET_PAGE_BITS);
1124 if (!p)
1125 return;
1126 if (!p->code_bitmap &&
1127 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1128 is_cpu_write_access) {
1129 /* build code bitmap */
1130 build_page_bitmap(p);
1131 }
1132
1133 /* we remove all the TBs in the range [start, end[ */
1134 /* XXX: see if in some cases it could be faster to invalidate all the code */
1135 tb = p->first_tb;
1136 while (tb != NULL) {
1137 n = (uintptr_t)tb & 3;
1138 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1139 tb_next = tb->page_next[n];
1140 /* NOTE: this is subtle as a TB may span two physical pages */
1141 if (n == 0) {
1142 /* NOTE: tb_end may be after the end of the page, but
1143 it is not a problem */
1144 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1145 tb_end = tb_start + tb->size;
1146 } else {
1147 tb_start = tb->page_addr[1];
1148 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1149 }
1150 if (!(tb_end <= start || tb_start >= end)) {
1151 #ifdef TARGET_HAS_PRECISE_SMC
1152 if (current_tb_not_found) {
1153 current_tb_not_found = 0;
1154 current_tb = NULL;
1155 if (env->mem_io_pc) {
1156 /* now we have a real cpu fault */
1157 current_tb = tb_find_pc(env->mem_io_pc);
1158 }
1159 }
1160 if (current_tb == tb &&
1161 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1162 /* If we are modifying the current TB, we must stop
1163 its execution. We could be more precise by checking
1164 that the modification is after the current PC, but it
1165 would require a specialized function to partially
1166 restore the CPU state */
1167
1168 current_tb_modified = 1;
1169 cpu_restore_state(current_tb, env, env->mem_io_pc);
1170 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1171 &current_flags);
1172 }
1173 #endif /* TARGET_HAS_PRECISE_SMC */
1174 /* we need to do that to handle the case where a signal
1175 occurs while doing tb_phys_invalidate() */
1176 saved_tb = NULL;
1177 if (env) {
1178 saved_tb = env->current_tb;
1179 env->current_tb = NULL;
1180 }
1181 tb_phys_invalidate(tb, -1);
1182 if (env) {
1183 env->current_tb = saved_tb;
1184 if (env->interrupt_request && env->current_tb)
1185 cpu_interrupt(env, env->interrupt_request);
1186 }
1187 }
1188 tb = tb_next;
1189 }
1190 #if !defined(CONFIG_USER_ONLY)
1191 /* if no code remaining, no need to continue to use slow writes */
1192 if (!p->first_tb) {
1193 invalidate_page_bitmap(p);
1194 if (is_cpu_write_access) {
1195 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1196 }
1197 }
1198 #endif
1199 #ifdef TARGET_HAS_PRECISE_SMC
1200 if (current_tb_modified) {
1201 /* we generate a block containing just the instruction
1202 modifying the memory. It will ensure that it cannot modify
1203 itself */
1204 env->current_tb = NULL;
1205 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1206 cpu_resume_from_signal(env, NULL);
1207 }
1208 #endif
1209 }
1210
1211 /* len must be <= 8 and start must be a multiple of len */
1212 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1213 {
1214 PageDesc *p;
1215 int offset, b;
1216 #if 0
1217 if (1) {
1218 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1219 cpu_single_env->mem_io_vaddr, len,
1220 cpu_single_env->eip,
1221 cpu_single_env->eip +
1222 (intptr_t)cpu_single_env->segs[R_CS].base);
1223 }
1224 #endif
1225 p = page_find(start >> TARGET_PAGE_BITS);
1226 if (!p)
1227 return;
1228 if (p->code_bitmap) {
1229 offset = start & ~TARGET_PAGE_MASK;
1230 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1231 if (b & ((1 << len) - 1))
1232 goto do_invalidate;
1233 } else {
1234 do_invalidate:
1235 tb_invalidate_phys_page_range(start, start + len, 1);
1236 }
1237 }
1238
1239 #if !defined(CONFIG_SOFTMMU)
1240 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1241 uintptr_t pc, void *puc)
1242 {
1243 TranslationBlock *tb;
1244 PageDesc *p;
1245 int n;
1246 #ifdef TARGET_HAS_PRECISE_SMC
1247 TranslationBlock *current_tb = NULL;
1248 CPUArchState *env = cpu_single_env;
1249 int current_tb_modified = 0;
1250 target_ulong current_pc = 0;
1251 target_ulong current_cs_base = 0;
1252 int current_flags = 0;
1253 #endif
1254
1255 addr &= TARGET_PAGE_MASK;
1256 p = page_find(addr >> TARGET_PAGE_BITS);
1257 if (!p)
1258 return;
1259 tb = p->first_tb;
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (tb && pc != 0) {
1262 current_tb = tb_find_pc(pc);
1263 }
1264 #endif
1265 while (tb != NULL) {
1266 n = (uintptr_t)tb & 3;
1267 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1268 #ifdef TARGET_HAS_PRECISE_SMC
1269 if (current_tb == tb &&
1270 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1271 /* If we are modifying the current TB, we must stop
1272 its execution. We could be more precise by checking
1273 that the modification is after the current PC, but it
1274 would require a specialized function to partially
1275 restore the CPU state */
1276
1277 current_tb_modified = 1;
1278 cpu_restore_state(current_tb, env, pc);
1279 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1280 &current_flags);
1281 }
1282 #endif /* TARGET_HAS_PRECISE_SMC */
1283 tb_phys_invalidate(tb, addr);
1284 tb = tb->page_next[n];
1285 }
1286 p->first_tb = NULL;
1287 #ifdef TARGET_HAS_PRECISE_SMC
1288 if (current_tb_modified) {
1289 /* we generate a block containing just the instruction
1290 modifying the memory. It will ensure that it cannot modify
1291 itself */
1292 env->current_tb = NULL;
1293 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1294 cpu_resume_from_signal(env, puc);
1295 }
1296 #endif
1297 }
1298 #endif
1299
1300 /* add the tb in the target page and protect it if necessary */
1301 static inline void tb_alloc_page(TranslationBlock *tb,
1302 unsigned int n, tb_page_addr_t page_addr)
1303 {
1304 PageDesc *p;
1305 #ifndef CONFIG_USER_ONLY
1306 bool page_already_protected;
1307 #endif
1308
1309 tb->page_addr[n] = page_addr;
1310 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1311 tb->page_next[n] = p->first_tb;
1312 #ifndef CONFIG_USER_ONLY
1313 page_already_protected = p->first_tb != NULL;
1314 #endif
1315 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1316 invalidate_page_bitmap(p);
1317
1318 #if defined(TARGET_HAS_SMC) || 1
1319
1320 #if defined(CONFIG_USER_ONLY)
1321 if (p->flags & PAGE_WRITE) {
1322 target_ulong addr;
1323 PageDesc *p2;
1324 int prot;
1325
1326 /* force the host page as non writable (writes will have a
1327 page fault + mprotect overhead) */
1328 page_addr &= qemu_host_page_mask;
1329 prot = 0;
1330 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1331 addr += TARGET_PAGE_SIZE) {
1332
1333 p2 = page_find (addr >> TARGET_PAGE_BITS);
1334 if (!p2)
1335 continue;
1336 prot |= p2->flags;
1337 p2->flags &= ~PAGE_WRITE;
1338 }
1339 mprotect(g2h(page_addr), qemu_host_page_size,
1340 (prot & PAGE_BITS) & ~PAGE_WRITE);
1341 #ifdef DEBUG_TB_INVALIDATE
1342 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1343 page_addr);
1344 #endif
1345 }
1346 #else
1347 /* if some code is already present, then the pages are already
1348 protected. So we handle the case where only the first TB is
1349 allocated in a physical page */
1350 if (!page_already_protected) {
1351 tlb_protect_code(page_addr);
1352 }
1353 #endif
1354
1355 #endif /* TARGET_HAS_SMC */
1356 }
1357
1358 /* add a new TB and link it to the physical page tables. phys_page2 is
1359 (-1) to indicate that only one page contains the TB. */
1360 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1361 tb_page_addr_t phys_page2)
1362 {
1363 unsigned int h;
1364 TranslationBlock **ptb;
1365
1366 /* Grab the mmap lock to stop another thread invalidating this TB
1367 before we are done. */
1368 mmap_lock();
1369 /* add in the physical hash table */
1370 h = tb_phys_hash_func(phys_pc);
1371 ptb = &tb_phys_hash[h];
1372 tb->phys_hash_next = *ptb;
1373 *ptb = tb;
1374
1375 /* add in the page list */
1376 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1377 if (phys_page2 != -1)
1378 tb_alloc_page(tb, 1, phys_page2);
1379 else
1380 tb->page_addr[1] = -1;
1381
1382 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1383 tb->jmp_next[0] = NULL;
1384 tb->jmp_next[1] = NULL;
1385
1386 /* init original jump addresses */
1387 if (tb->tb_next_offset[0] != 0xffff)
1388 tb_reset_jump(tb, 0);
1389 if (tb->tb_next_offset[1] != 0xffff)
1390 tb_reset_jump(tb, 1);
1391
1392 #ifdef DEBUG_TB_CHECK
1393 tb_page_check();
1394 #endif
1395 mmap_unlock();
1396 }
1397
1398 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1399 /* check whether the given addr is in TCG generated code buffer or not */
1400 bool is_tcg_gen_code(uintptr_t tc_ptr)
1401 {
1402 /* This can be called during code generation, code_gen_buffer_max_size
1403 is used instead of code_gen_ptr for upper boundary checking */
1404 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1405 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1406 }
1407 #endif
1408
1409 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1410 tb[1].tc_ptr. Return NULL if not found */
1411 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1412 {
1413 int m_min, m_max, m;
1414 uintptr_t v;
1415 TranslationBlock *tb;
1416
1417 if (nb_tbs <= 0)
1418 return NULL;
1419 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1420 tc_ptr >= (uintptr_t)code_gen_ptr) {
1421 return NULL;
1422 }
1423 /* binary search (cf Knuth) */
1424 m_min = 0;
1425 m_max = nb_tbs - 1;
1426 while (m_min <= m_max) {
1427 m = (m_min + m_max) >> 1;
1428 tb = &tbs[m];
1429 v = (uintptr_t)tb->tc_ptr;
1430 if (v == tc_ptr)
1431 return tb;
1432 else if (tc_ptr < v) {
1433 m_max = m - 1;
1434 } else {
1435 m_min = m + 1;
1436 }
1437 }
1438 return &tbs[m_max];
1439 }
1440
1441 static void tb_reset_jump_recursive(TranslationBlock *tb);
1442
1443 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1444 {
1445 TranslationBlock *tb1, *tb_next, **ptb;
1446 unsigned int n1;
1447
1448 tb1 = tb->jmp_next[n];
1449 if (tb1 != NULL) {
1450 /* find head of list */
1451 for(;;) {
1452 n1 = (uintptr_t)tb1 & 3;
1453 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1454 if (n1 == 2)
1455 break;
1456 tb1 = tb1->jmp_next[n1];
1457 }
1458 /* we are now sure now that tb jumps to tb1 */
1459 tb_next = tb1;
1460
1461 /* remove tb from the jmp_first list */
1462 ptb = &tb_next->jmp_first;
1463 for(;;) {
1464 tb1 = *ptb;
1465 n1 = (uintptr_t)tb1 & 3;
1466 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1467 if (n1 == n && tb1 == tb)
1468 break;
1469 ptb = &tb1->jmp_next[n1];
1470 }
1471 *ptb = tb->jmp_next[n];
1472 tb->jmp_next[n] = NULL;
1473
1474 /* suppress the jump to next tb in generated code */
1475 tb_reset_jump(tb, n);
1476
1477 /* suppress jumps in the tb on which we could have jumped */
1478 tb_reset_jump_recursive(tb_next);
1479 }
1480 }
1481
1482 static void tb_reset_jump_recursive(TranslationBlock *tb)
1483 {
1484 tb_reset_jump_recursive2(tb, 0);
1485 tb_reset_jump_recursive2(tb, 1);
1486 }
1487
1488 #if defined(TARGET_HAS_ICE)
1489 #if defined(CONFIG_USER_ONLY)
1490 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1491 {
1492 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1493 }
1494 #else
1495 void tb_invalidate_phys_addr(hwaddr addr)
1496 {
1497 ram_addr_t ram_addr;
1498 MemoryRegionSection *section;
1499
1500 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1501 if (!(memory_region_is_ram(section->mr)
1502 || (section->mr->rom_device && section->mr->readable))) {
1503 return;
1504 }
1505 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1506 + memory_region_section_addr(section, addr);
1507 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1508 }
1509
1510 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1511 {
1512 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1513 (pc & ~TARGET_PAGE_MASK));
1514 }
1515 #endif
1516 #endif /* TARGET_HAS_ICE */
1517
1518 #if defined(CONFIG_USER_ONLY)
1519 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1520
1521 {
1522 }
1523
1524 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1525 int flags, CPUWatchpoint **watchpoint)
1526 {
1527 return -ENOSYS;
1528 }
1529 #else
1530 /* Add a watchpoint. */
1531 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1532 int flags, CPUWatchpoint **watchpoint)
1533 {
1534 target_ulong len_mask = ~(len - 1);
1535 CPUWatchpoint *wp;
1536
1537 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1538 if ((len & (len - 1)) || (addr & ~len_mask) ||
1539 len == 0 || len > TARGET_PAGE_SIZE) {
1540 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1541 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1542 return -EINVAL;
1543 }
1544 wp = g_malloc(sizeof(*wp));
1545
1546 wp->vaddr = addr;
1547 wp->len_mask = len_mask;
1548 wp->flags = flags;
1549
1550 /* keep all GDB-injected watchpoints in front */
1551 if (flags & BP_GDB)
1552 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1553 else
1554 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1555
1556 tlb_flush_page(env, addr);
1557
1558 if (watchpoint)
1559 *watchpoint = wp;
1560 return 0;
1561 }
1562
1563 /* Remove a specific watchpoint. */
1564 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1565 int flags)
1566 {
1567 target_ulong len_mask = ~(len - 1);
1568 CPUWatchpoint *wp;
1569
1570 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1571 if (addr == wp->vaddr && len_mask == wp->len_mask
1572 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1573 cpu_watchpoint_remove_by_ref(env, wp);
1574 return 0;
1575 }
1576 }
1577 return -ENOENT;
1578 }
1579
1580 /* Remove a specific watchpoint by reference. */
1581 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1582 {
1583 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1584
1585 tlb_flush_page(env, watchpoint->vaddr);
1586
1587 g_free(watchpoint);
1588 }
1589
1590 /* Remove all matching watchpoints. */
1591 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1592 {
1593 CPUWatchpoint *wp, *next;
1594
1595 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1596 if (wp->flags & mask)
1597 cpu_watchpoint_remove_by_ref(env, wp);
1598 }
1599 }
1600 #endif
1601
1602 /* Add a breakpoint. */
1603 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1604 CPUBreakpoint **breakpoint)
1605 {
1606 #if defined(TARGET_HAS_ICE)
1607 CPUBreakpoint *bp;
1608
1609 bp = g_malloc(sizeof(*bp));
1610
1611 bp->pc = pc;
1612 bp->flags = flags;
1613
1614 /* keep all GDB-injected breakpoints in front */
1615 if (flags & BP_GDB)
1616 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1617 else
1618 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1619
1620 breakpoint_invalidate(env, pc);
1621
1622 if (breakpoint)
1623 *breakpoint = bp;
1624 return 0;
1625 #else
1626 return -ENOSYS;
1627 #endif
1628 }
1629
1630 /* Remove a specific breakpoint. */
1631 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1632 {
1633 #if defined(TARGET_HAS_ICE)
1634 CPUBreakpoint *bp;
1635
1636 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1637 if (bp->pc == pc && bp->flags == flags) {
1638 cpu_breakpoint_remove_by_ref(env, bp);
1639 return 0;
1640 }
1641 }
1642 return -ENOENT;
1643 #else
1644 return -ENOSYS;
1645 #endif
1646 }
1647
1648 /* Remove a specific breakpoint by reference. */
1649 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1650 {
1651 #if defined(TARGET_HAS_ICE)
1652 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1653
1654 breakpoint_invalidate(env, breakpoint->pc);
1655
1656 g_free(breakpoint);
1657 #endif
1658 }
1659
1660 /* Remove all matching breakpoints. */
1661 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1662 {
1663 #if defined(TARGET_HAS_ICE)
1664 CPUBreakpoint *bp, *next;
1665
1666 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1667 if (bp->flags & mask)
1668 cpu_breakpoint_remove_by_ref(env, bp);
1669 }
1670 #endif
1671 }
1672
1673 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1674 CPU loop after each instruction */
1675 void cpu_single_step(CPUArchState *env, int enabled)
1676 {
1677 #if defined(TARGET_HAS_ICE)
1678 if (env->singlestep_enabled != enabled) {
1679 env->singlestep_enabled = enabled;
1680 if (kvm_enabled())
1681 kvm_update_guest_debug(env, 0);
1682 else {
1683 /* must flush all the translated code to avoid inconsistencies */
1684 /* XXX: only flush what is necessary */
1685 tb_flush(env);
1686 }
1687 }
1688 #endif
1689 }
1690
1691 static void cpu_unlink_tb(CPUArchState *env)
1692 {
1693 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1694 problem and hope the cpu will stop of its own accord. For userspace
1695 emulation this often isn't actually as bad as it sounds. Often
1696 signals are used primarily to interrupt blocking syscalls. */
1697 TranslationBlock *tb;
1698 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1699
1700 spin_lock(&interrupt_lock);
1701 tb = env->current_tb;
1702 /* if the cpu is currently executing code, we must unlink it and
1703 all the potentially executing TB */
1704 if (tb) {
1705 env->current_tb = NULL;
1706 tb_reset_jump_recursive(tb);
1707 }
1708 spin_unlock(&interrupt_lock);
1709 }
1710
1711 #ifndef CONFIG_USER_ONLY
1712 /* mask must never be zero, except for A20 change call */
1713 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1714 {
1715 CPUState *cpu = ENV_GET_CPU(env);
1716 int old_mask;
1717
1718 old_mask = env->interrupt_request;
1719 env->interrupt_request |= mask;
1720
1721 /*
1722 * If called from iothread context, wake the target cpu in
1723 * case its halted.
1724 */
1725 if (!qemu_cpu_is_self(cpu)) {
1726 qemu_cpu_kick(cpu);
1727 return;
1728 }
1729
1730 if (use_icount) {
1731 env->icount_decr.u16.high = 0xffff;
1732 if (!can_do_io(env)
1733 && (mask & ~old_mask) != 0) {
1734 cpu_abort(env, "Raised interrupt while not in I/O function");
1735 }
1736 } else {
1737 cpu_unlink_tb(env);
1738 }
1739 }
1740
1741 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1742
1743 #else /* CONFIG_USER_ONLY */
1744
1745 void cpu_interrupt(CPUArchState *env, int mask)
1746 {
1747 env->interrupt_request |= mask;
1748 cpu_unlink_tb(env);
1749 }
1750 #endif /* CONFIG_USER_ONLY */
1751
1752 void cpu_reset_interrupt(CPUArchState *env, int mask)
1753 {
1754 env->interrupt_request &= ~mask;
1755 }
1756
1757 void cpu_exit(CPUArchState *env)
1758 {
1759 env->exit_request = 1;
1760 cpu_unlink_tb(env);
1761 }
1762
1763 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1764 {
1765 va_list ap;
1766 va_list ap2;
1767
1768 va_start(ap, fmt);
1769 va_copy(ap2, ap);
1770 fprintf(stderr, "qemu: fatal: ");
1771 vfprintf(stderr, fmt, ap);
1772 fprintf(stderr, "\n");
1773 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1774 if (qemu_log_enabled()) {
1775 qemu_log("qemu: fatal: ");
1776 qemu_log_vprintf(fmt, ap2);
1777 qemu_log("\n");
1778 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1779 qemu_log_flush();
1780 qemu_log_close();
1781 }
1782 va_end(ap2);
1783 va_end(ap);
1784 #if defined(CONFIG_USER_ONLY)
1785 {
1786 struct sigaction act;
1787 sigfillset(&act.sa_mask);
1788 act.sa_handler = SIG_DFL;
1789 sigaction(SIGABRT, &act, NULL);
1790 }
1791 #endif
1792 abort();
1793 }
1794
1795 CPUArchState *cpu_copy(CPUArchState *env)
1796 {
1797 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1798 CPUArchState *next_cpu = new_env->next_cpu;
1799 int cpu_index = new_env->cpu_index;
1800 #if defined(TARGET_HAS_ICE)
1801 CPUBreakpoint *bp;
1802 CPUWatchpoint *wp;
1803 #endif
1804
1805 memcpy(new_env, env, sizeof(CPUArchState));
1806
1807 /* Preserve chaining and index. */
1808 new_env->next_cpu = next_cpu;
1809 new_env->cpu_index = cpu_index;
1810
1811 /* Clone all break/watchpoints.
1812 Note: Once we support ptrace with hw-debug register access, make sure
1813 BP_CPU break/watchpoints are handled correctly on clone. */
1814 QTAILQ_INIT(&env->breakpoints);
1815 QTAILQ_INIT(&env->watchpoints);
1816 #if defined(TARGET_HAS_ICE)
1817 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1818 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1819 }
1820 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1821 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1822 wp->flags, NULL);
1823 }
1824 #endif
1825
1826 return new_env;
1827 }
1828
1829 #if !defined(CONFIG_USER_ONLY)
1830 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1831 {
1832 unsigned int i;
1833
1834 /* Discard jump cache entries for any tb which might potentially
1835 overlap the flushed page. */
1836 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1837 memset (&env->tb_jmp_cache[i], 0,
1838 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1839
1840 i = tb_jmp_cache_hash_page(addr);
1841 memset (&env->tb_jmp_cache[i], 0,
1842 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1843 }
1844
1845 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1846 uintptr_t length)
1847 {
1848 uintptr_t start1;
1849
1850 /* we modify the TLB cache so that the dirty bit will be set again
1851 when accessing the range */
1852 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1853 /* Check that we don't span multiple blocks - this breaks the
1854 address comparisons below. */
1855 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1856 != (end - 1) - start) {
1857 abort();
1858 }
1859 cpu_tlb_reset_dirty_all(start1, length);
1860
1861 }
1862
1863 /* Note: start and end must be within the same ram block. */
1864 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1865 int dirty_flags)
1866 {
1867 uintptr_t length;
1868
1869 start &= TARGET_PAGE_MASK;
1870 end = TARGET_PAGE_ALIGN(end);
1871
1872 length = end - start;
1873 if (length == 0)
1874 return;
1875 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1876
1877 if (tcg_enabled()) {
1878 tlb_reset_dirty_range_all(start, end, length);
1879 }
1880 }
1881
1882 static int cpu_physical_memory_set_dirty_tracking(int enable)
1883 {
1884 int ret = 0;
1885 in_migration = enable;
1886 return ret;
1887 }
1888
1889 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1890 MemoryRegionSection *section,
1891 target_ulong vaddr,
1892 hwaddr paddr,
1893 int prot,
1894 target_ulong *address)
1895 {
1896 hwaddr iotlb;
1897 CPUWatchpoint *wp;
1898
1899 if (memory_region_is_ram(section->mr)) {
1900 /* Normal RAM. */
1901 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1902 + memory_region_section_addr(section, paddr);
1903 if (!section->readonly) {
1904 iotlb |= phys_section_notdirty;
1905 } else {
1906 iotlb |= phys_section_rom;
1907 }
1908 } else {
1909 /* IO handlers are currently passed a physical address.
1910 It would be nice to pass an offset from the base address
1911 of that region. This would avoid having to special case RAM,
1912 and avoid full address decoding in every device.
1913 We can't use the high bits of pd for this because
1914 IO_MEM_ROMD uses these as a ram address. */
1915 iotlb = section - phys_sections;
1916 iotlb += memory_region_section_addr(section, paddr);
1917 }
1918
1919 /* Make accesses to pages with watchpoints go via the
1920 watchpoint trap routines. */
1921 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1922 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1923 /* Avoid trapping reads of pages with a write breakpoint. */
1924 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1925 iotlb = phys_section_watch + paddr;
1926 *address |= TLB_MMIO;
1927 break;
1928 }
1929 }
1930 }
1931
1932 return iotlb;
1933 }
1934
1935 #else
1936 /*
1937 * Walks guest process memory "regions" one by one
1938 * and calls callback function 'fn' for each region.
1939 */
1940
1941 struct walk_memory_regions_data
1942 {
1943 walk_memory_regions_fn fn;
1944 void *priv;
1945 uintptr_t start;
1946 int prot;
1947 };
1948
1949 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1950 abi_ulong end, int new_prot)
1951 {
1952 if (data->start != -1ul) {
1953 int rc = data->fn(data->priv, data->start, end, data->prot);
1954 if (rc != 0) {
1955 return rc;
1956 }
1957 }
1958
1959 data->start = (new_prot ? end : -1ul);
1960 data->prot = new_prot;
1961
1962 return 0;
1963 }
1964
1965 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1966 abi_ulong base, int level, void **lp)
1967 {
1968 abi_ulong pa;
1969 int i, rc;
1970
1971 if (*lp == NULL) {
1972 return walk_memory_regions_end(data, base, 0);
1973 }
1974
1975 if (level == 0) {
1976 PageDesc *pd = *lp;
1977 for (i = 0; i < L2_SIZE; ++i) {
1978 int prot = pd[i].flags;
1979
1980 pa = base | (i << TARGET_PAGE_BITS);
1981 if (prot != data->prot) {
1982 rc = walk_memory_regions_end(data, pa, prot);
1983 if (rc != 0) {
1984 return rc;
1985 }
1986 }
1987 }
1988 } else {
1989 void **pp = *lp;
1990 for (i = 0; i < L2_SIZE; ++i) {
1991 pa = base | ((abi_ulong)i <<
1992 (TARGET_PAGE_BITS + L2_BITS * level));
1993 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1994 if (rc != 0) {
1995 return rc;
1996 }
1997 }
1998 }
1999
2000 return 0;
2001 }
2002
2003 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2004 {
2005 struct walk_memory_regions_data data;
2006 uintptr_t i;
2007
2008 data.fn = fn;
2009 data.priv = priv;
2010 data.start = -1ul;
2011 data.prot = 0;
2012
2013 for (i = 0; i < V_L1_SIZE; i++) {
2014 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2015 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2016 if (rc != 0) {
2017 return rc;
2018 }
2019 }
2020
2021 return walk_memory_regions_end(&data, 0, 0);
2022 }
2023
2024 static int dump_region(void *priv, abi_ulong start,
2025 abi_ulong end, unsigned long prot)
2026 {
2027 FILE *f = (FILE *)priv;
2028
2029 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2030 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2031 start, end, end - start,
2032 ((prot & PAGE_READ) ? 'r' : '-'),
2033 ((prot & PAGE_WRITE) ? 'w' : '-'),
2034 ((prot & PAGE_EXEC) ? 'x' : '-'));
2035
2036 return (0);
2037 }
2038
2039 /* dump memory mappings */
2040 void page_dump(FILE *f)
2041 {
2042 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2043 "start", "end", "size", "prot");
2044 walk_memory_regions(f, dump_region);
2045 }
2046
2047 int page_get_flags(target_ulong address)
2048 {
2049 PageDesc *p;
2050
2051 p = page_find(address >> TARGET_PAGE_BITS);
2052 if (!p)
2053 return 0;
2054 return p->flags;
2055 }
2056
2057 /* Modify the flags of a page and invalidate the code if necessary.
2058 The flag PAGE_WRITE_ORG is positioned automatically depending
2059 on PAGE_WRITE. The mmap_lock should already be held. */
2060 void page_set_flags(target_ulong start, target_ulong end, int flags)
2061 {
2062 target_ulong addr, len;
2063
2064 /* This function should never be called with addresses outside the
2065 guest address space. If this assert fires, it probably indicates
2066 a missing call to h2g_valid. */
2067 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2068 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2069 #endif
2070 assert(start < end);
2071
2072 start = start & TARGET_PAGE_MASK;
2073 end = TARGET_PAGE_ALIGN(end);
2074
2075 if (flags & PAGE_WRITE) {
2076 flags |= PAGE_WRITE_ORG;
2077 }
2078
2079 for (addr = start, len = end - start;
2080 len != 0;
2081 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2082 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2083
2084 /* If the write protection bit is set, then we invalidate
2085 the code inside. */
2086 if (!(p->flags & PAGE_WRITE) &&
2087 (flags & PAGE_WRITE) &&
2088 p->first_tb) {
2089 tb_invalidate_phys_page(addr, 0, NULL);
2090 }
2091 p->flags = flags;
2092 }
2093 }
2094
2095 int page_check_range(target_ulong start, target_ulong len, int flags)
2096 {
2097 PageDesc *p;
2098 target_ulong end;
2099 target_ulong addr;
2100
2101 /* This function should never be called with addresses outside the
2102 guest address space. If this assert fires, it probably indicates
2103 a missing call to h2g_valid. */
2104 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2105 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2106 #endif
2107
2108 if (len == 0) {
2109 return 0;
2110 }
2111 if (start + len - 1 < start) {
2112 /* We've wrapped around. */
2113 return -1;
2114 }
2115
2116 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2117 start = start & TARGET_PAGE_MASK;
2118
2119 for (addr = start, len = end - start;
2120 len != 0;
2121 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2122 p = page_find(addr >> TARGET_PAGE_BITS);
2123 if( !p )
2124 return -1;
2125 if( !(p->flags & PAGE_VALID) )
2126 return -1;
2127
2128 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2129 return -1;
2130 if (flags & PAGE_WRITE) {
2131 if (!(p->flags & PAGE_WRITE_ORG))
2132 return -1;
2133 /* unprotect the page if it was put read-only because it
2134 contains translated code */
2135 if (!(p->flags & PAGE_WRITE)) {
2136 if (!page_unprotect(addr, 0, NULL))
2137 return -1;
2138 }
2139 return 0;
2140 }
2141 }
2142 return 0;
2143 }
2144
2145 /* called from signal handler: invalidate the code and unprotect the
2146 page. Return TRUE if the fault was successfully handled. */
2147 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2148 {
2149 unsigned int prot;
2150 PageDesc *p;
2151 target_ulong host_start, host_end, addr;
2152
2153 /* Technically this isn't safe inside a signal handler. However we
2154 know this only ever happens in a synchronous SEGV handler, so in
2155 practice it seems to be ok. */
2156 mmap_lock();
2157
2158 p = page_find(address >> TARGET_PAGE_BITS);
2159 if (!p) {
2160 mmap_unlock();
2161 return 0;
2162 }
2163
2164 /* if the page was really writable, then we change its
2165 protection back to writable */
2166 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2167 host_start = address & qemu_host_page_mask;
2168 host_end = host_start + qemu_host_page_size;
2169
2170 prot = 0;
2171 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2172 p = page_find(addr >> TARGET_PAGE_BITS);
2173 p->flags |= PAGE_WRITE;
2174 prot |= p->flags;
2175
2176 /* and since the content will be modified, we must invalidate
2177 the corresponding translated code. */
2178 tb_invalidate_phys_page(addr, pc, puc);
2179 #ifdef DEBUG_TB_CHECK
2180 tb_invalidate_check(addr);
2181 #endif
2182 }
2183 mprotect((void *)g2h(host_start), qemu_host_page_size,
2184 prot & PAGE_BITS);
2185
2186 mmap_unlock();
2187 return 1;
2188 }
2189 mmap_unlock();
2190 return 0;
2191 }
2192 #endif /* defined(CONFIG_USER_ONLY) */
2193
2194 #if !defined(CONFIG_USER_ONLY)
2195
2196 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2197 typedef struct subpage_t {
2198 MemoryRegion iomem;
2199 hwaddr base;
2200 uint16_t sub_section[TARGET_PAGE_SIZE];
2201 } subpage_t;
2202
2203 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2204 uint16_t section);
2205 static subpage_t *subpage_init(hwaddr base);
2206 static void destroy_page_desc(uint16_t section_index)
2207 {
2208 MemoryRegionSection *section = &phys_sections[section_index];
2209 MemoryRegion *mr = section->mr;
2210
2211 if (mr->subpage) {
2212 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2213 memory_region_destroy(&subpage->iomem);
2214 g_free(subpage);
2215 }
2216 }
2217
2218 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2219 {
2220 unsigned i;
2221 PhysPageEntry *p;
2222
2223 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2224 return;
2225 }
2226
2227 p = phys_map_nodes[lp->ptr];
2228 for (i = 0; i < L2_SIZE; ++i) {
2229 if (!p[i].is_leaf) {
2230 destroy_l2_mapping(&p[i], level - 1);
2231 } else {
2232 destroy_page_desc(p[i].ptr);
2233 }
2234 }
2235 lp->is_leaf = 0;
2236 lp->ptr = PHYS_MAP_NODE_NIL;
2237 }
2238
2239 static void destroy_all_mappings(AddressSpaceDispatch *d)
2240 {
2241 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2242 phys_map_nodes_reset();
2243 }
2244
2245 static uint16_t phys_section_add(MemoryRegionSection *section)
2246 {
2247 if (phys_sections_nb == phys_sections_nb_alloc) {
2248 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2249 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2250 phys_sections_nb_alloc);
2251 }
2252 phys_sections[phys_sections_nb] = *section;
2253 return phys_sections_nb++;
2254 }
2255
2256 static void phys_sections_clear(void)
2257 {
2258 phys_sections_nb = 0;
2259 }
2260
2261 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2262 {
2263 subpage_t *subpage;
2264 hwaddr base = section->offset_within_address_space
2265 & TARGET_PAGE_MASK;
2266 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2267 MemoryRegionSection subsection = {
2268 .offset_within_address_space = base,
2269 .size = TARGET_PAGE_SIZE,
2270 };
2271 hwaddr start, end;
2272
2273 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2274
2275 if (!(existing->mr->subpage)) {
2276 subpage = subpage_init(base);
2277 subsection.mr = &subpage->iomem;
2278 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2279 phys_section_add(&subsection));
2280 } else {
2281 subpage = container_of(existing->mr, subpage_t, iomem);
2282 }
2283 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2284 end = start + section->size - 1;
2285 subpage_register(subpage, start, end, phys_section_add(section));
2286 }
2287
2288
2289 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2290 {
2291 hwaddr start_addr = section->offset_within_address_space;
2292 ram_addr_t size = section->size;
2293 hwaddr addr;
2294 uint16_t section_index = phys_section_add(section);
2295
2296 assert(size);
2297
2298 addr = start_addr;
2299 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2300 section_index);
2301 }
2302
2303 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2304 {
2305 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2306 MemoryRegionSection now = *section, remain = *section;
2307
2308 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2309 || (now.size < TARGET_PAGE_SIZE)) {
2310 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2311 - now.offset_within_address_space,
2312 now.size);
2313 register_subpage(d, &now);
2314 remain.size -= now.size;
2315 remain.offset_within_address_space += now.size;
2316 remain.offset_within_region += now.size;
2317 }
2318 while (remain.size >= TARGET_PAGE_SIZE) {
2319 now = remain;
2320 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2321 now.size = TARGET_PAGE_SIZE;
2322 register_subpage(d, &now);
2323 } else {
2324 now.size &= TARGET_PAGE_MASK;
2325 register_multipage(d, &now);
2326 }
2327 remain.size -= now.size;
2328 remain.offset_within_address_space += now.size;
2329 remain.offset_within_region += now.size;
2330 }
2331 now = remain;
2332 if (now.size) {
2333 register_subpage(d, &now);
2334 }
2335 }
2336
2337 void qemu_flush_coalesced_mmio_buffer(void)
2338 {
2339 if (kvm_enabled())
2340 kvm_flush_coalesced_mmio_buffer();
2341 }
2342
2343 #if defined(__linux__) && !defined(TARGET_S390X)
2344
2345 #include <sys/vfs.h>
2346
2347 #define HUGETLBFS_MAGIC 0x958458f6
2348
2349 static long gethugepagesize(const char *path)
2350 {
2351 struct statfs fs;
2352 int ret;
2353
2354 do {
2355 ret = statfs(path, &fs);
2356 } while (ret != 0 && errno == EINTR);
2357
2358 if (ret != 0) {
2359 perror(path);
2360 return 0;
2361 }
2362
2363 if (fs.f_type != HUGETLBFS_MAGIC)
2364 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2365
2366 return fs.f_bsize;
2367 }
2368
2369 static void *file_ram_alloc(RAMBlock *block,
2370 ram_addr_t memory,
2371 const char *path)
2372 {
2373 char *filename;
2374 void *area;
2375 int fd;
2376 #ifdef MAP_POPULATE
2377 int flags;
2378 #endif
2379 unsigned long hpagesize;
2380
2381 hpagesize = gethugepagesize(path);
2382 if (!hpagesize) {
2383 return NULL;
2384 }
2385
2386 if (memory < hpagesize) {
2387 return NULL;
2388 }
2389
2390 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2391 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2392 return NULL;
2393 }
2394
2395 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2396 return NULL;
2397 }
2398
2399 fd = mkstemp(filename);
2400 if (fd < 0) {
2401 perror("unable to create backing store for hugepages");
2402 free(filename);
2403 return NULL;
2404 }
2405 unlink(filename);
2406 free(filename);
2407
2408 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2409
2410 /*
2411 * ftruncate is not supported by hugetlbfs in older
2412 * hosts, so don't bother bailing out on errors.
2413 * If anything goes wrong with it under other filesystems,
2414 * mmap will fail.
2415 */
2416 if (ftruncate(fd, memory))
2417 perror("ftruncate");
2418
2419 #ifdef MAP_POPULATE
2420 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2421 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2422 * to sidestep this quirk.
2423 */
2424 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2425 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2426 #else
2427 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2428 #endif
2429 if (area == MAP_FAILED) {
2430 perror("file_ram_alloc: can't mmap RAM pages");
2431 close(fd);
2432 return (NULL);
2433 }
2434 block->fd = fd;
2435 return area;
2436 }
2437 #endif
2438
2439 static ram_addr_t find_ram_offset(ram_addr_t size)
2440 {
2441 RAMBlock *block, *next_block;
2442 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2443
2444 if (QLIST_EMPTY(&ram_list.blocks))
2445 return 0;
2446
2447 QLIST_FOREACH(block, &ram_list.blocks, next) {
2448 ram_addr_t end, next = RAM_ADDR_MAX;
2449
2450 end = block->offset + block->length;
2451
2452 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2453 if (next_block->offset >= end) {
2454 next = MIN(next, next_block->offset);
2455 }
2456 }
2457 if (next - end >= size && next - end < mingap) {
2458 offset = end;
2459 mingap = next - end;
2460 }
2461 }
2462
2463 if (offset == RAM_ADDR_MAX) {
2464 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2465 (uint64_t)size);
2466 abort();
2467 }
2468
2469 return offset;
2470 }
2471
2472 ram_addr_t last_ram_offset(void)
2473 {
2474 RAMBlock *block;
2475 ram_addr_t last = 0;
2476
2477 QLIST_FOREACH(block, &ram_list.blocks, next)
2478 last = MAX(last, block->offset + block->length);
2479
2480 return last;
2481 }
2482
2483 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2484 {
2485 int ret;
2486 QemuOpts *machine_opts;
2487
2488 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2489 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2490 if (machine_opts &&
2491 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2492 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2493 if (ret) {
2494 perror("qemu_madvise");
2495 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2496 "but dump_guest_core=off specified\n");
2497 }
2498 }
2499 }
2500
2501 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2502 {
2503 RAMBlock *new_block, *block;
2504
2505 new_block = NULL;
2506 QLIST_FOREACH(block, &ram_list.blocks, next) {
2507 if (block->offset == addr) {
2508 new_block = block;
2509 break;
2510 }
2511 }
2512 assert(new_block);
2513 assert(!new_block->idstr[0]);
2514
2515 if (dev) {
2516 char *id = qdev_get_dev_path(dev);
2517 if (id) {
2518 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2519 g_free(id);
2520 }
2521 }
2522 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2523
2524 QLIST_FOREACH(block, &ram_list.blocks, next) {
2525 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2526 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2527 new_block->idstr);
2528 abort();
2529 }
2530 }
2531 }
2532
2533 static int memory_try_enable_merging(void *addr, size_t len)
2534 {
2535 QemuOpts *opts;
2536
2537 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2538 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2539 /* disabled by the user */
2540 return 0;
2541 }
2542
2543 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2544 }
2545
2546 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2547 MemoryRegion *mr)
2548 {
2549 RAMBlock *new_block;
2550
2551 size = TARGET_PAGE_ALIGN(size);
2552 new_block = g_malloc0(sizeof(*new_block));
2553
2554 new_block->mr = mr;
2555 new_block->offset = find_ram_offset(size);
2556 if (host) {
2557 new_block->host = host;
2558 new_block->flags |= RAM_PREALLOC_MASK;
2559 } else {
2560 if (mem_path) {
2561 #if defined (__linux__) && !defined(TARGET_S390X)
2562 new_block->host = file_ram_alloc(new_block, size, mem_path);
2563 if (!new_block->host) {
2564 new_block->host = qemu_vmalloc(size);
2565 memory_try_enable_merging(new_block->host, size);
2566 }
2567 #else
2568 fprintf(stderr, "-mem-path option unsupported\n");
2569 exit(1);
2570 #endif
2571 } else {
2572 if (xen_enabled()) {
2573 xen_ram_alloc(new_block->offset, size, mr);
2574 } else if (kvm_enabled()) {
2575 /* some s390/kvm configurations have special constraints */
2576 new_block->host = kvm_vmalloc(size);
2577 } else {
2578 new_block->host = qemu_vmalloc(size);
2579 }
2580 memory_try_enable_merging(new_block->host, size);
2581 }
2582 }
2583 new_block->length = size;
2584
2585 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2586
2587 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2588 last_ram_offset() >> TARGET_PAGE_BITS);
2589 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2590 0, size >> TARGET_PAGE_BITS);
2591 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2592
2593 qemu_ram_setup_dump(new_block->host, size);
2594 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2595
2596 if (kvm_enabled())
2597 kvm_setup_guest_memory(new_block->host, size);
2598
2599 return new_block->offset;
2600 }
2601
2602 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2603 {
2604 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2605 }
2606
2607 void qemu_ram_free_from_ptr(ram_addr_t addr)
2608 {
2609 RAMBlock *block;
2610
2611 QLIST_FOREACH(block, &ram_list.blocks, next) {
2612 if (addr == block->offset) {
2613 QLIST_REMOVE(block, next);
2614 g_free(block);
2615 return;
2616 }
2617 }
2618 }
2619
2620 void qemu_ram_free(ram_addr_t addr)
2621 {
2622 RAMBlock *block;
2623
2624 QLIST_FOREACH(block, &ram_list.blocks, next) {
2625 if (addr == block->offset) {
2626 QLIST_REMOVE(block, next);
2627 if (block->flags & RAM_PREALLOC_MASK) {
2628 ;
2629 } else if (mem_path) {
2630 #if defined (__linux__) && !defined(TARGET_S390X)
2631 if (block->fd) {
2632 munmap(block->host, block->length);
2633 close(block->fd);
2634 } else {
2635 qemu_vfree(block->host);
2636 }
2637 #else
2638 abort();
2639 #endif
2640 } else {
2641 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2642 munmap(block->host, block->length);
2643 #else
2644 if (xen_enabled()) {
2645 xen_invalidate_map_cache_entry(block->host);
2646 } else {
2647 qemu_vfree(block->host);
2648 }
2649 #endif
2650 }
2651 g_free(block);
2652 return;
2653 }
2654 }
2655
2656 }
2657
2658 #ifndef _WIN32
2659 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2660 {
2661 RAMBlock *block;
2662 ram_addr_t offset;
2663 int flags;
2664 void *area, *vaddr;
2665
2666 QLIST_FOREACH(block, &ram_list.blocks, next) {
2667 offset = addr - block->offset;
2668 if (offset < block->length) {
2669 vaddr = block->host + offset;
2670 if (block->flags & RAM_PREALLOC_MASK) {
2671 ;
2672 } else {
2673 flags = MAP_FIXED;
2674 munmap(vaddr, length);
2675 if (mem_path) {
2676 #if defined(__linux__) && !defined(TARGET_S390X)
2677 if (block->fd) {
2678 #ifdef MAP_POPULATE
2679 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2680 MAP_PRIVATE;
2681 #else
2682 flags |= MAP_PRIVATE;
2683 #endif
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685 flags, block->fd, offset);
2686 } else {
2687 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2688 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2689 flags, -1, 0);
2690 }
2691 #else
2692 abort();
2693 #endif
2694 } else {
2695 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2696 flags |= MAP_SHARED | MAP_ANONYMOUS;
2697 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2698 flags, -1, 0);
2699 #else
2700 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2701 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2702 flags, -1, 0);
2703 #endif
2704 }
2705 if (area != vaddr) {
2706 fprintf(stderr, "Could not remap addr: "
2707 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2708 length, addr);
2709 exit(1);
2710 }
2711 memory_try_enable_merging(vaddr, length);
2712 qemu_ram_setup_dump(vaddr, length);
2713 }
2714 return;
2715 }
2716 }
2717 }
2718 #endif /* !_WIN32 */
2719
2720 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2721 With the exception of the softmmu code in this file, this should
2722 only be used for local memory (e.g. video ram) that the device owns,
2723 and knows it isn't going to access beyond the end of the block.
2724
2725 It should not be used for general purpose DMA.
2726 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2727 */
2728 void *qemu_get_ram_ptr(ram_addr_t addr)
2729 {
2730 RAMBlock *block;
2731
2732 QLIST_FOREACH(block, &ram_list.blocks, next) {
2733 if (addr - block->offset < block->length) {
2734 /* Move this entry to to start of the list. */
2735 if (block != QLIST_FIRST(&ram_list.blocks)) {
2736 QLIST_REMOVE(block, next);
2737 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2738 }
2739 if (xen_enabled()) {
2740 /* We need to check if the requested address is in the RAM
2741 * because we don't want to map the entire memory in QEMU.
2742 * In that case just map until the end of the page.
2743 */
2744 if (block->offset == 0) {
2745 return xen_map_cache(addr, 0, 0);
2746 } else if (block->host == NULL) {
2747 block->host =
2748 xen_map_cache(block->offset, block->length, 1);
2749 }
2750 }
2751 return block->host + (addr - block->offset);
2752 }
2753 }
2754
2755 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2756 abort();
2757
2758 return NULL;
2759 }
2760
2761 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2762 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2763 */
2764 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2765 {
2766 RAMBlock *block;
2767
2768 QLIST_FOREACH(block, &ram_list.blocks, next) {
2769 if (addr - block->offset < block->length) {
2770 if (xen_enabled()) {
2771 /* We need to check if the requested address is in the RAM
2772 * because we don't want to map the entire memory in QEMU.
2773 * In that case just map until the end of the page.
2774 */
2775 if (block->offset == 0) {
2776 return xen_map_cache(addr, 0, 0);
2777 } else if (block->host == NULL) {
2778 block->host =
2779 xen_map_cache(block->offset, block->length, 1);
2780 }
2781 }
2782 return block->host + (addr - block->offset);
2783 }
2784 }
2785
2786 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2787 abort();
2788
2789 return NULL;
2790 }
2791
2792 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2793 * but takes a size argument */
2794 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2795 {
2796 if (*size == 0) {
2797 return NULL;
2798 }
2799 if (xen_enabled()) {
2800 return xen_map_cache(addr, *size, 1);
2801 } else {
2802 RAMBlock *block;
2803
2804 QLIST_FOREACH(block, &ram_list.blocks, next) {
2805 if (addr - block->offset < block->length) {
2806 if (addr - block->offset + *size > block->length)
2807 *size = block->length - addr + block->offset;
2808 return block->host + (addr - block->offset);
2809 }
2810 }
2811
2812 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2813 abort();
2814 }
2815 }
2816
2817 void qemu_put_ram_ptr(void *addr)
2818 {
2819 trace_qemu_put_ram_ptr(addr);
2820 }
2821
2822 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2823 {
2824 RAMBlock *block;
2825 uint8_t *host = ptr;
2826
2827 if (xen_enabled()) {
2828 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2829 return 0;
2830 }
2831
2832 QLIST_FOREACH(block, &ram_list.blocks, next) {
2833 /* This case append when the block is not mapped. */
2834 if (block->host == NULL) {
2835 continue;
2836 }
2837 if (host - block->host < block->length) {
2838 *ram_addr = block->offset + (host - block->host);
2839 return 0;
2840 }
2841 }
2842
2843 return -1;
2844 }
2845
2846 /* Some of the softmmu routines need to translate from a host pointer
2847 (typically a TLB entry) back to a ram offset. */
2848 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2849 {
2850 ram_addr_t ram_addr;
2851
2852 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2853 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2854 abort();
2855 }
2856 return ram_addr;
2857 }
2858
2859 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2860 unsigned size)
2861 {
2862 #ifdef DEBUG_UNASSIGNED
2863 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2864 #endif
2865 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2866 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2867 #endif
2868 return 0;
2869 }
2870
2871 static void unassigned_mem_write(void *opaque, hwaddr addr,
2872 uint64_t val, unsigned size)
2873 {
2874 #ifdef DEBUG_UNASSIGNED
2875 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2876 #endif
2877 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2878 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2879 #endif
2880 }
2881
2882 static const MemoryRegionOps unassigned_mem_ops = {
2883 .read = unassigned_mem_read,
2884 .write = unassigned_mem_write,
2885 .endianness = DEVICE_NATIVE_ENDIAN,
2886 };
2887
2888 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2889 unsigned size)
2890 {
2891 abort();
2892 }
2893
2894 static void error_mem_write(void *opaque, hwaddr addr,
2895 uint64_t value, unsigned size)
2896 {
2897 abort();
2898 }
2899
2900 static const MemoryRegionOps error_mem_ops = {
2901 .read = error_mem_read,
2902 .write = error_mem_write,
2903 .endianness = DEVICE_NATIVE_ENDIAN,
2904 };
2905
2906 static const MemoryRegionOps rom_mem_ops = {
2907 .read = error_mem_read,
2908 .write = unassigned_mem_write,
2909 .endianness = DEVICE_NATIVE_ENDIAN,
2910 };
2911
2912 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2913 uint64_t val, unsigned size)
2914 {
2915 int dirty_flags;
2916 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2917 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2918 #if !defined(CONFIG_USER_ONLY)
2919 tb_invalidate_phys_page_fast(ram_addr, size);
2920 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2921 #endif
2922 }
2923 switch (size) {
2924 case 1:
2925 stb_p(qemu_get_ram_ptr(ram_addr), val);
2926 break;
2927 case 2:
2928 stw_p(qemu_get_ram_ptr(ram_addr), val);
2929 break;
2930 case 4:
2931 stl_p(qemu_get_ram_ptr(ram_addr), val);
2932 break;
2933 default:
2934 abort();
2935 }
2936 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2937 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2938 /* we remove the notdirty callback only if the code has been
2939 flushed */
2940 if (dirty_flags == 0xff)
2941 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2942 }
2943
2944 static const MemoryRegionOps notdirty_mem_ops = {
2945 .read = error_mem_read,
2946 .write = notdirty_mem_write,
2947 .endianness = DEVICE_NATIVE_ENDIAN,
2948 };
2949
2950 /* Generate a debug exception if a watchpoint has been hit. */
2951 static void check_watchpoint(int offset, int len_mask, int flags)
2952 {
2953 CPUArchState *env = cpu_single_env;
2954 target_ulong pc, cs_base;
2955 TranslationBlock *tb;
2956 target_ulong vaddr;
2957 CPUWatchpoint *wp;
2958 int cpu_flags;
2959
2960 if (env->watchpoint_hit) {
2961 /* We re-entered the check after replacing the TB. Now raise
2962 * the debug interrupt so that is will trigger after the
2963 * current instruction. */
2964 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2965 return;
2966 }
2967 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2968 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2969 if ((vaddr == (wp->vaddr & len_mask) ||
2970 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2971 wp->flags |= BP_WATCHPOINT_HIT;
2972 if (!env->watchpoint_hit) {
2973 env->watchpoint_hit = wp;
2974 tb = tb_find_pc(env->mem_io_pc);
2975 if (!tb) {
2976 cpu_abort(env, "check_watchpoint: could not find TB for "
2977 "pc=%p", (void *)env->mem_io_pc);
2978 }
2979 cpu_restore_state(tb, env, env->mem_io_pc);
2980 tb_phys_invalidate(tb, -1);
2981 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2982 env->exception_index = EXCP_DEBUG;
2983 cpu_loop_exit(env);
2984 } else {
2985 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2986 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2987 cpu_resume_from_signal(env, NULL);
2988 }
2989 }
2990 } else {
2991 wp->flags &= ~BP_WATCHPOINT_HIT;
2992 }
2993 }
2994 }
2995
2996 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2997 so these check for a hit then pass through to the normal out-of-line
2998 phys routines. */
2999 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
3000 unsigned size)
3001 {
3002 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3003 switch (size) {
3004 case 1: return ldub_phys(addr);
3005 case 2: return lduw_phys(addr);
3006 case 4: return ldl_phys(addr);
3007 default: abort();
3008 }
3009 }
3010
3011 static void watch_mem_write(void *opaque, hwaddr addr,
3012 uint64_t val, unsigned size)
3013 {
3014 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3015 switch (size) {
3016 case 1:
3017 stb_phys(addr, val);
3018 break;
3019 case 2:
3020 stw_phys(addr, val);
3021 break;
3022 case 4:
3023 stl_phys(addr, val);
3024 break;
3025 default: abort();
3026 }
3027 }
3028
3029 static const MemoryRegionOps watch_mem_ops = {
3030 .read = watch_mem_read,
3031 .write = watch_mem_write,
3032 .endianness = DEVICE_NATIVE_ENDIAN,
3033 };
3034
3035 static uint64_t subpage_read(void *opaque, hwaddr addr,
3036 unsigned len)
3037 {
3038 subpage_t *mmio = opaque;
3039 unsigned int idx = SUBPAGE_IDX(addr);
3040 MemoryRegionSection *section;
3041 #if defined(DEBUG_SUBPAGE)
3042 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3043 mmio, len, addr, idx);
3044 #endif
3045
3046 section = &phys_sections[mmio->sub_section[idx]];
3047 addr += mmio->base;
3048 addr -= section->offset_within_address_space;
3049 addr += section->offset_within_region;
3050 return io_mem_read(section->mr, addr, len);
3051 }
3052
3053 static void subpage_write(void *opaque, hwaddr addr,
3054 uint64_t value, unsigned len)
3055 {
3056 subpage_t *mmio = opaque;
3057 unsigned int idx = SUBPAGE_IDX(addr);
3058 MemoryRegionSection *section;
3059 #if defined(DEBUG_SUBPAGE)
3060 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3061 " idx %d value %"PRIx64"\n",
3062 __func__, mmio, len, addr, idx, value);
3063 #endif
3064
3065 section = &phys_sections[mmio->sub_section[idx]];
3066 addr += mmio->base;
3067 addr -= section->offset_within_address_space;
3068 addr += section->offset_within_region;
3069 io_mem_write(section->mr, addr, value, len);
3070 }
3071
3072 static const MemoryRegionOps subpage_ops = {
3073 .read = subpage_read,
3074 .write = subpage_write,
3075 .endianness = DEVICE_NATIVE_ENDIAN,
3076 };
3077
3078 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3079 unsigned size)
3080 {
3081 ram_addr_t raddr = addr;
3082 void *ptr = qemu_get_ram_ptr(raddr);
3083 switch (size) {
3084 case 1: return ldub_p(ptr);
3085 case 2: return lduw_p(ptr);
3086 case 4: return ldl_p(ptr);
3087 default: abort();
3088 }
3089 }
3090
3091 static void subpage_ram_write(void *opaque, hwaddr addr,
3092 uint64_t value, unsigned size)
3093 {
3094 ram_addr_t raddr = addr;
3095 void *ptr = qemu_get_ram_ptr(raddr);
3096 switch (size) {
3097 case 1: return stb_p(ptr, value);
3098 case 2: return stw_p(ptr, value);
3099 case 4: return stl_p(ptr, value);
3100 default: abort();
3101 }
3102 }
3103
3104 static const MemoryRegionOps subpage_ram_ops = {
3105 .read = subpage_ram_read,
3106 .write = subpage_ram_write,
3107 .endianness = DEVICE_NATIVE_ENDIAN,
3108 };
3109
3110 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3111 uint16_t section)
3112 {
3113 int idx, eidx;
3114
3115 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3116 return -1;
3117 idx = SUBPAGE_IDX(start);
3118 eidx = SUBPAGE_IDX(end);
3119 #if defined(DEBUG_SUBPAGE)
3120 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3121 mmio, start, end, idx, eidx, memory);
3122 #endif
3123 if (memory_region_is_ram(phys_sections[section].mr)) {
3124 MemoryRegionSection new_section = phys_sections[section];
3125 new_section.mr = &io_mem_subpage_ram;
3126 section = phys_section_add(&new_section);
3127 }
3128 for (; idx <= eidx; idx++) {
3129 mmio->sub_section[idx] = section;
3130 }
3131
3132 return 0;
3133 }
3134
3135 static subpage_t *subpage_init(hwaddr base)
3136 {
3137 subpage_t *mmio;
3138
3139 mmio = g_malloc0(sizeof(subpage_t));
3140
3141 mmio->base = base;
3142 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3143 "subpage", TARGET_PAGE_SIZE);
3144 mmio->iomem.subpage = true;
3145 #if defined(DEBUG_SUBPAGE)
3146 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3147 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3148 #endif
3149 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3150
3151 return mmio;
3152 }
3153
3154 static uint16_t dummy_section(MemoryRegion *mr)
3155 {
3156 MemoryRegionSection section = {
3157 .mr = mr,
3158 .offset_within_address_space = 0,
3159 .offset_within_region = 0,
3160 .size = UINT64_MAX,
3161 };
3162
3163 return phys_section_add(&section);
3164 }
3165
3166 MemoryRegion *iotlb_to_region(hwaddr index)
3167 {
3168 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3169 }
3170
3171 static void io_mem_init(void)
3172 {
3173 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3174 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3175 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3176 "unassigned", UINT64_MAX);
3177 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3178 "notdirty", UINT64_MAX);
3179 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3180 "subpage-ram", UINT64_MAX);
3181 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3182 "watch", UINT64_MAX);
3183 }
3184
3185 static void mem_begin(MemoryListener *listener)
3186 {
3187 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3188
3189 destroy_all_mappings(d);
3190 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3191 }
3192
3193 static void core_begin(MemoryListener *listener)
3194 {
3195 phys_sections_clear();
3196 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3197 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3198 phys_section_rom = dummy_section(&io_mem_rom);
3199 phys_section_watch = dummy_section(&io_mem_watch);
3200 }
3201
3202 static void tcg_commit(MemoryListener *listener)
3203 {
3204 CPUArchState *env;
3205
3206 /* since each CPU stores ram addresses in its TLB cache, we must
3207 reset the modified entries */
3208 /* XXX: slow ! */
3209 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3210 tlb_flush(env, 1);
3211 }
3212 }
3213
3214 static void core_log_global_start(MemoryListener *listener)
3215 {
3216 cpu_physical_memory_set_dirty_tracking(1);
3217 }
3218
3219 static void core_log_global_stop(MemoryListener *listener)
3220 {
3221 cpu_physical_memory_set_dirty_tracking(0);
3222 }
3223
3224 static void io_region_add(MemoryListener *listener,
3225 MemoryRegionSection *section)
3226 {
3227 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3228
3229 mrio->mr = section->mr;
3230 mrio->offset = section->offset_within_region;
3231 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3232 section->offset_within_address_space, section->size);
3233 ioport_register(&mrio->iorange);
3234 }
3235
3236 static void io_region_del(MemoryListener *listener,
3237 MemoryRegionSection *section)
3238 {
3239 isa_unassign_ioport(section->offset_within_address_space, section->size);
3240 }
3241
3242 static MemoryListener core_memory_listener = {
3243 .begin = core_begin,
3244 .log_global_start = core_log_global_start,
3245 .log_global_stop = core_log_global_stop,
3246 .priority = 1,
3247 };
3248
3249 static MemoryListener io_memory_listener = {
3250 .region_add = io_region_add,
3251 .region_del = io_region_del,
3252 .priority = 0,
3253 };
3254
3255 static MemoryListener tcg_memory_listener = {
3256 .commit = tcg_commit,
3257 };
3258
3259 void address_space_init_dispatch(AddressSpace *as)
3260 {
3261 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3262
3263 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3264 d->listener = (MemoryListener) {
3265 .begin = mem_begin,
3266 .region_add = mem_add,
3267 .region_nop = mem_add,
3268 .priority = 0,
3269 };
3270 as->dispatch = d;
3271 memory_listener_register(&d->listener, as);
3272 }
3273
3274 void address_space_destroy_dispatch(AddressSpace *as)
3275 {
3276 AddressSpaceDispatch *d = as->dispatch;
3277
3278 memory_listener_unregister(&d->listener);
3279 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3280 g_free(d);
3281 as->dispatch = NULL;
3282 }
3283
3284 static void memory_map_init(void)
3285 {
3286 system_memory = g_malloc(sizeof(*system_memory));
3287 memory_region_init(system_memory, "system", INT64_MAX);
3288 address_space_init(&address_space_memory, system_memory);
3289 address_space_memory.name = "memory";
3290
3291 system_io = g_malloc(sizeof(*system_io));
3292 memory_region_init(system_io, "io", 65536);
3293 address_space_init(&address_space_io, system_io);
3294 address_space_io.name = "I/O";
3295
3296 memory_listener_register(&core_memory_listener, &address_space_memory);
3297 memory_listener_register(&io_memory_listener, &address_space_io);
3298 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3299
3300 dma_context_init(&dma_context_memory, &address_space_memory,
3301 NULL, NULL, NULL);
3302 }
3303
3304 MemoryRegion *get_system_memory(void)
3305 {
3306 return system_memory;
3307 }
3308
3309 MemoryRegion *get_system_io(void)
3310 {
3311 return system_io;
3312 }
3313
3314 #endif /* !defined(CONFIG_USER_ONLY) */
3315
3316 /* physical memory access (slow version, mainly for debug) */
3317 #if defined(CONFIG_USER_ONLY)
3318 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3319 uint8_t *buf, int len, int is_write)
3320 {
3321 int l, flags;
3322 target_ulong page;
3323 void * p;
3324
3325 while (len > 0) {
3326 page = addr & TARGET_PAGE_MASK;
3327 l = (page + TARGET_PAGE_SIZE) - addr;
3328 if (l > len)
3329 l = len;
3330 flags = page_get_flags(page);
3331 if (!(flags & PAGE_VALID))
3332 return -1;
3333 if (is_write) {
3334 if (!(flags & PAGE_WRITE))
3335 return -1;
3336 /* XXX: this code should not depend on lock_user */
3337 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3338 return -1;
3339 memcpy(p, buf, l);
3340 unlock_user(p, addr, l);
3341 } else {
3342 if (!(flags & PAGE_READ))
3343 return -1;
3344 /* XXX: this code should not depend on lock_user */
3345 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3346 return -1;
3347 memcpy(buf, p, l);
3348 unlock_user(p, addr, 0);
3349 }
3350 len -= l;
3351 buf += l;
3352 addr += l;
3353 }
3354 return 0;
3355 }
3356
3357 #else
3358
3359 static void invalidate_and_set_dirty(hwaddr addr,
3360 hwaddr length)
3361 {
3362 if (!cpu_physical_memory_is_dirty(addr)) {
3363 /* invalidate code */
3364 tb_invalidate_phys_page_range(addr, addr + length, 0);
3365 /* set dirty bit */
3366 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3367 }
3368 xen_modified_memory(addr, length);
3369 }
3370
3371 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3372 int len, bool is_write)
3373 {
3374 AddressSpaceDispatch *d = as->dispatch;
3375 int l;
3376 uint8_t *ptr;
3377 uint32_t val;
3378 hwaddr page;
3379 MemoryRegionSection *section;
3380
3381 while (len > 0) {
3382 page = addr & TARGET_PAGE_MASK;
3383 l = (page + TARGET_PAGE_SIZE) - addr;
3384 if (l > len)
3385 l = len;
3386 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3387
3388 if (is_write) {
3389 if (!memory_region_is_ram(section->mr)) {
3390 hwaddr addr1;
3391 addr1 = memory_region_section_addr(section, addr);
3392 /* XXX: could force cpu_single_env to NULL to avoid
3393 potential bugs */
3394 if (l >= 4 && ((addr1 & 3) == 0)) {
3395 /* 32 bit write access */
3396 val = ldl_p(buf);
3397 io_mem_write(section->mr, addr1, val, 4);
3398 l = 4;
3399 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3400 /* 16 bit write access */
3401 val = lduw_p(buf);
3402 io_mem_write(section->mr, addr1, val, 2);
3403 l = 2;
3404 } else {
3405 /* 8 bit write access */
3406 val = ldub_p(buf);
3407 io_mem_write(section->mr, addr1, val, 1);
3408 l = 1;
3409 }
3410 } else if (!section->readonly) {
3411 ram_addr_t addr1;
3412 addr1 = memory_region_get_ram_addr(section->mr)
3413 + memory_region_section_addr(section, addr);
3414 /* RAM case */
3415 ptr = qemu_get_ram_ptr(addr1);
3416 memcpy(ptr, buf, l);
3417 invalidate_and_set_dirty(addr1, l);
3418 qemu_put_ram_ptr(ptr);
3419 }
3420 } else {
3421 if (!(memory_region_is_ram(section->mr) ||
3422 memory_region_is_romd(section->mr))) {
3423 hwaddr addr1;
3424 /* I/O case */
3425 addr1 = memory_region_section_addr(section, addr);
3426 if (l >= 4 && ((addr1 & 3) == 0)) {
3427 /* 32 bit read access */
3428 val = io_mem_read(section->mr, addr1, 4);
3429 stl_p(buf, val);
3430 l = 4;
3431 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3432 /* 16 bit read access */
3433 val = io_mem_read(section->mr, addr1, 2);
3434 stw_p(buf, val);
3435 l = 2;
3436 } else {
3437 /* 8 bit read access */
3438 val = io_mem_read(section->mr, addr1, 1);
3439 stb_p(buf, val);
3440 l = 1;
3441 }
3442 } else {
3443 /* RAM case */
3444 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3445 + memory_region_section_addr(section,
3446 addr));
3447 memcpy(buf, ptr, l);
3448 qemu_put_ram_ptr(ptr);
3449 }
3450 }
3451 len -= l;
3452 buf += l;
3453 addr += l;
3454 }
3455 }
3456
3457 void address_space_write(AddressSpace *as, hwaddr addr,
3458 const uint8_t *buf, int len)
3459 {
3460 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3461 }
3462
3463 /**
3464 * address_space_read: read from an address space.
3465 *
3466 * @as: #AddressSpace to be accessed
3467 * @addr: address within that address space
3468 * @buf: buffer with the data transferred
3469 */
3470 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3471 {
3472 address_space_rw(as, addr, buf, len, false);
3473 }
3474
3475
3476 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3477 int len, int is_write)
3478 {
3479 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3480 }
3481
3482 /* used for ROM loading : can write in RAM and ROM */
3483 void cpu_physical_memory_write_rom(hwaddr addr,
3484 const uint8_t *buf, int len)
3485 {
3486 AddressSpaceDispatch *d = address_space_memory.dispatch;
3487 int l;
3488 uint8_t *ptr;
3489 hwaddr page;
3490 MemoryRegionSection *section;
3491
3492 while (len > 0) {
3493 page = addr & TARGET_PAGE_MASK;
3494 l = (page + TARGET_PAGE_SIZE) - addr;
3495 if (l > len)
3496 l = len;
3497 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3498
3499 if (!(memory_region_is_ram(section->mr) ||
3500 memory_region_is_romd(section->mr))) {
3501 /* do nothing */
3502 } else {
3503 unsigned long addr1;
3504 addr1 = memory_region_get_ram_addr(section->mr)
3505 + memory_region_section_addr(section, addr);
3506 /* ROM/RAM case */
3507 ptr = qemu_get_ram_ptr(addr1);
3508 memcpy(ptr, buf, l);
3509 invalidate_and_set_dirty(addr1, l);
3510 qemu_put_ram_ptr(ptr);
3511 }
3512 len -= l;
3513 buf += l;
3514 addr += l;
3515 }
3516 }
3517
3518 typedef struct {
3519 void *buffer;
3520 hwaddr addr;
3521 hwaddr len;
3522 } BounceBuffer;
3523
3524 static BounceBuffer bounce;
3525
3526 typedef struct MapClient {
3527 void *opaque;
3528 void (*callback)(void *opaque);
3529 QLIST_ENTRY(MapClient) link;
3530 } MapClient;
3531
3532 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3533 = QLIST_HEAD_INITIALIZER(map_client_list);
3534
3535 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3536 {
3537 MapClient *client = g_malloc(sizeof(*client));
3538
3539 client->opaque = opaque;
3540 client->callback = callback;
3541 QLIST_INSERT_HEAD(&map_client_list, client, link);
3542 return client;
3543 }
3544
3545 static void cpu_unregister_map_client(void *_client)
3546 {
3547 MapClient *client = (MapClient *)_client;
3548
3549 QLIST_REMOVE(client, link);
3550 g_free(client);
3551 }
3552
3553 static void cpu_notify_map_clients(void)
3554 {
3555 MapClient *client;
3556
3557 while (!QLIST_EMPTY(&map_client_list)) {
3558 client = QLIST_FIRST(&map_client_list);
3559 client->callback(client->opaque);
3560 cpu_unregister_map_client(client);
3561 }
3562 }
3563
3564 /* Map a physical memory region into a host virtual address.
3565 * May map a subset of the requested range, given by and returned in *plen.
3566 * May return NULL if resources needed to perform the mapping are exhausted.
3567 * Use only for reads OR writes - not for read-modify-write operations.
3568 * Use cpu_register_map_client() to know when retrying the map operation is
3569 * likely to succeed.
3570 */
3571 void *address_space_map(AddressSpace *as,
3572 hwaddr addr,
3573 hwaddr *plen,
3574 bool is_write)
3575 {
3576 AddressSpaceDispatch *d = as->dispatch;
3577 hwaddr len = *plen;
3578 hwaddr todo = 0;
3579 int l;
3580 hwaddr page;
3581 MemoryRegionSection *section;
3582 ram_addr_t raddr = RAM_ADDR_MAX;
3583 ram_addr_t rlen;
3584 void *ret;
3585
3586 while (len > 0) {
3587 page = addr & TARGET_PAGE_MASK;
3588 l = (page + TARGET_PAGE_SIZE) - addr;
3589 if (l > len)
3590 l = len;
3591 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3592
3593 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3594 if (todo || bounce.buffer) {
3595 break;
3596 }
3597 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3598 bounce.addr = addr;
3599 bounce.len = l;
3600 if (!is_write) {
3601 address_space_read(as, addr, bounce.buffer, l);
3602 }
3603
3604 *plen = l;
3605 return bounce.buffer;
3606 }
3607 if (!todo) {
3608 raddr = memory_region_get_ram_addr(section->mr)
3609 + memory_region_section_addr(section, addr);
3610 }
3611
3612 len -= l;
3613 addr += l;
3614 todo += l;
3615 }
3616 rlen = todo;
3617 ret = qemu_ram_ptr_length(raddr, &rlen);
3618 *plen = rlen;
3619 return ret;
3620 }
3621
3622 /* Unmaps a memory region previously mapped by address_space_map().
3623 * Will also mark the memory as dirty if is_write == 1. access_len gives
3624 * the amount of memory that was actually read or written by the caller.
3625 */
3626 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3627 int is_write, hwaddr access_len)
3628 {
3629 if (buffer != bounce.buffer) {
3630 if (is_write) {
3631 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3632 while (access_len) {
3633 unsigned l;
3634 l = TARGET_PAGE_SIZE;
3635 if (l > access_len)
3636 l = access_len;
3637 invalidate_and_set_dirty(addr1, l);
3638 addr1 += l;
3639 access_len -= l;
3640 }
3641 }
3642 if (xen_enabled()) {
3643 xen_invalidate_map_cache_entry(buffer);
3644 }
3645 return;
3646 }
3647 if (is_write) {
3648 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3649 }
3650 qemu_vfree(bounce.buffer);
3651 bounce.buffer = NULL;
3652 cpu_notify_map_clients();
3653 }
3654
3655 void *cpu_physical_memory_map(hwaddr addr,
3656 hwaddr *plen,
3657 int is_write)
3658 {
3659 return address_space_map(&address_space_memory, addr, plen, is_write);
3660 }
3661
3662 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3663 int is_write, hwaddr access_len)
3664 {
3665 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3666 }
3667
3668 /* warning: addr must be aligned */
3669 static inline uint32_t ldl_phys_internal(hwaddr addr,
3670 enum device_endian endian)
3671 {
3672 uint8_t *ptr;
3673 uint32_t val;
3674 MemoryRegionSection *section;
3675
3676 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3677
3678 if (!(memory_region_is_ram(section->mr) ||
3679 memory_region_is_romd(section->mr))) {
3680 /* I/O case */
3681 addr = memory_region_section_addr(section, addr);
3682 val = io_mem_read(section->mr, addr, 4);
3683 #if defined(TARGET_WORDS_BIGENDIAN)
3684 if (endian == DEVICE_LITTLE_ENDIAN) {
3685 val = bswap32(val);
3686 }
3687 #else
3688 if (endian == DEVICE_BIG_ENDIAN) {
3689 val = bswap32(val);
3690 }
3691 #endif
3692 } else {
3693 /* RAM case */
3694 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3695 & TARGET_PAGE_MASK)
3696 + memory_region_section_addr(section, addr));
3697 switch (endian) {
3698 case DEVICE_LITTLE_ENDIAN:
3699 val = ldl_le_p(ptr);
3700 break;
3701 case DEVICE_BIG_ENDIAN:
3702 val = ldl_be_p(ptr);
3703 break;
3704 default:
3705 val = ldl_p(ptr);
3706 break;
3707 }
3708 }
3709 return val;
3710 }
3711
3712 uint32_t ldl_phys(hwaddr addr)
3713 {
3714 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3715 }
3716
3717 uint32_t ldl_le_phys(hwaddr addr)
3718 {
3719 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3720 }
3721
3722 uint32_t ldl_be_phys(hwaddr addr)
3723 {
3724 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3725 }
3726
3727 /* warning: addr must be aligned */
3728 static inline uint64_t ldq_phys_internal(hwaddr addr,
3729 enum device_endian endian)
3730 {
3731 uint8_t *ptr;
3732 uint64_t val;
3733 MemoryRegionSection *section;
3734
3735 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3736
3737 if (!(memory_region_is_ram(section->mr) ||
3738 memory_region_is_romd(section->mr))) {
3739 /* I/O case */
3740 addr = memory_region_section_addr(section, addr);
3741
3742 /* XXX This is broken when device endian != cpu endian.
3743 Fix and add "endian" variable check */
3744 #ifdef TARGET_WORDS_BIGENDIAN
3745 val = io_mem_read(section->mr, addr, 4) << 32;
3746 val |= io_mem_read(section->mr, addr + 4, 4);
3747 #else
3748 val = io_mem_read(section->mr, addr, 4);
3749 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3750 #endif
3751 } else {
3752 /* RAM case */
3753 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3754 & TARGET_PAGE_MASK)
3755 + memory_region_section_addr(section, addr));
3756 switch (endian) {
3757 case DEVICE_LITTLE_ENDIAN:
3758 val = ldq_le_p(ptr);
3759 break;
3760 case DEVICE_BIG_ENDIAN:
3761 val = ldq_be_p(ptr);
3762 break;
3763 default:
3764 val = ldq_p(ptr);
3765 break;
3766 }
3767 }
3768 return val;
3769 }
3770
3771 uint64_t ldq_phys(hwaddr addr)
3772 {
3773 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3774 }
3775
3776 uint64_t ldq_le_phys(hwaddr addr)
3777 {
3778 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3779 }
3780
3781 uint64_t ldq_be_phys(hwaddr addr)
3782 {
3783 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3784 }
3785
3786 /* XXX: optimize */
3787 uint32_t ldub_phys(hwaddr addr)
3788 {
3789 uint8_t val;
3790 cpu_physical_memory_read(addr, &val, 1);
3791 return val;
3792 }
3793
3794 /* warning: addr must be aligned */
3795 static inline uint32_t lduw_phys_internal(hwaddr addr,
3796 enum device_endian endian)
3797 {
3798 uint8_t *ptr;
3799 uint64_t val;
3800 MemoryRegionSection *section;
3801
3802 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3803
3804 if (!(memory_region_is_ram(section->mr) ||
3805 memory_region_is_romd(section->mr))) {
3806 /* I/O case */
3807 addr = memory_region_section_addr(section, addr);
3808 val = io_mem_read(section->mr, addr, 2);
3809 #if defined(TARGET_WORDS_BIGENDIAN)
3810 if (endian == DEVICE_LITTLE_ENDIAN) {
3811 val = bswap16(val);
3812 }
3813 #else
3814 if (endian == DEVICE_BIG_ENDIAN) {
3815 val = bswap16(val);
3816 }
3817 #endif
3818 } else {
3819 /* RAM case */
3820 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3821 & TARGET_PAGE_MASK)
3822 + memory_region_section_addr(section, addr));
3823 switch (endian) {
3824 case DEVICE_LITTLE_ENDIAN:
3825 val = lduw_le_p(ptr);
3826 break;
3827 case DEVICE_BIG_ENDIAN:
3828 val = lduw_be_p(ptr);
3829 break;
3830 default:
3831 val = lduw_p(ptr);
3832 break;
3833 }
3834 }
3835 return val;
3836 }
3837
3838 uint32_t lduw_phys(hwaddr addr)
3839 {
3840 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3841 }
3842
3843 uint32_t lduw_le_phys(hwaddr addr)
3844 {
3845 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3846 }
3847
3848 uint32_t lduw_be_phys(hwaddr addr)
3849 {
3850 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3851 }
3852
3853 /* warning: addr must be aligned. The ram page is not masked as dirty
3854 and the code inside is not invalidated. It is useful if the dirty
3855 bits are used to track modified PTEs */
3856 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3857 {
3858 uint8_t *ptr;
3859 MemoryRegionSection *section;
3860
3861 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3862
3863 if (!memory_region_is_ram(section->mr) || section->readonly) {
3864 addr = memory_region_section_addr(section, addr);
3865 if (memory_region_is_ram(section->mr)) {
3866 section = &phys_sections[phys_section_rom];
3867 }
3868 io_mem_write(section->mr, addr, val, 4);
3869 } else {
3870 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3871 & TARGET_PAGE_MASK)
3872 + memory_region_section_addr(section, addr);
3873 ptr = qemu_get_ram_ptr(addr1);
3874 stl_p(ptr, val);
3875
3876 if (unlikely(in_migration)) {
3877 if (!cpu_physical_memory_is_dirty(addr1)) {
3878 /* invalidate code */
3879 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3880 /* set dirty bit */
3881 cpu_physical_memory_set_dirty_flags(
3882 addr1, (0xff & ~CODE_DIRTY_FLAG));
3883 }
3884 }
3885 }
3886 }
3887
3888 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3889 {
3890 uint8_t *ptr;
3891 MemoryRegionSection *section;
3892
3893 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3894
3895 if (!memory_region_is_ram(section->mr) || section->readonly) {
3896 addr = memory_region_section_addr(section, addr);
3897 if (memory_region_is_ram(section->mr)) {
3898 section = &phys_sections[phys_section_rom];
3899 }
3900 #ifdef TARGET_WORDS_BIGENDIAN
3901 io_mem_write(section->mr, addr, val >> 32, 4);
3902 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3903 #else
3904 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3905 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3906 #endif
3907 } else {
3908 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3909 & TARGET_PAGE_MASK)
3910 + memory_region_section_addr(section, addr));
3911 stq_p(ptr, val);
3912 }
3913 }
3914
3915 /* warning: addr must be aligned */
3916 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3917 enum device_endian endian)
3918 {
3919 uint8_t *ptr;
3920 MemoryRegionSection *section;
3921
3922 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3923
3924 if (!memory_region_is_ram(section->mr) || section->readonly) {
3925 addr = memory_region_section_addr(section, addr);
3926 if (memory_region_is_ram(section->mr)) {
3927 section = &phys_sections[phys_section_rom];
3928 }
3929 #if defined(TARGET_WORDS_BIGENDIAN)
3930 if (endian == DEVICE_LITTLE_ENDIAN) {
3931 val = bswap32(val);
3932 }
3933 #else
3934 if (endian == DEVICE_BIG_ENDIAN) {
3935 val = bswap32(val);
3936 }
3937 #endif
3938 io_mem_write(section->mr, addr, val, 4);
3939 } else {
3940 unsigned long addr1;
3941 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3942 + memory_region_section_addr(section, addr);
3943 /* RAM case */
3944 ptr = qemu_get_ram_ptr(addr1);
3945 switch (endian) {
3946 case DEVICE_LITTLE_ENDIAN:
3947 stl_le_p(ptr, val);
3948 break;
3949 case DEVICE_BIG_ENDIAN:
3950 stl_be_p(ptr, val);
3951 break;
3952 default:
3953 stl_p(ptr, val);
3954 break;
3955 }
3956 invalidate_and_set_dirty(addr1, 4);
3957 }
3958 }
3959
3960 void stl_phys(hwaddr addr, uint32_t val)
3961 {
3962 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3963 }
3964
3965 void stl_le_phys(hwaddr addr, uint32_t val)
3966 {
3967 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3968 }
3969
3970 void stl_be_phys(hwaddr addr, uint32_t val)
3971 {
3972 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3973 }
3974
3975 /* XXX: optimize */
3976 void stb_phys(hwaddr addr, uint32_t val)
3977 {
3978 uint8_t v = val;
3979 cpu_physical_memory_write(addr, &v, 1);
3980 }
3981
3982 /* warning: addr must be aligned */
3983 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3984 enum device_endian endian)
3985 {
3986 uint8_t *ptr;
3987 MemoryRegionSection *section;
3988
3989 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3990
3991 if (!memory_region_is_ram(section->mr) || section->readonly) {
3992 addr = memory_region_section_addr(section, addr);
3993 if (memory_region_is_ram(section->mr)) {
3994 section = &phys_sections[phys_section_rom];
3995 }
3996 #if defined(TARGET_WORDS_BIGENDIAN)
3997 if (endian == DEVICE_LITTLE_ENDIAN) {
3998 val = bswap16(val);
3999 }
4000 #else
4001 if (endian == DEVICE_BIG_ENDIAN) {
4002 val = bswap16(val);
4003 }
4004 #endif
4005 io_mem_write(section->mr, addr, val, 2);
4006 } else {
4007 unsigned long addr1;
4008 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4009 + memory_region_section_addr(section, addr);
4010 /* RAM case */
4011 ptr = qemu_get_ram_ptr(addr1);
4012 switch (endian) {
4013 case DEVICE_LITTLE_ENDIAN:
4014 stw_le_p(ptr, val);
4015 break;
4016 case DEVICE_BIG_ENDIAN:
4017 stw_be_p(ptr, val);
4018 break;
4019 default:
4020 stw_p(ptr, val);
4021 break;
4022 }
4023 invalidate_and_set_dirty(addr1, 2);
4024 }
4025 }
4026
4027 void stw_phys(hwaddr addr, uint32_t val)
4028 {
4029 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4030 }
4031
4032 void stw_le_phys(hwaddr addr, uint32_t val)
4033 {
4034 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4035 }
4036
4037 void stw_be_phys(hwaddr addr, uint32_t val)
4038 {
4039 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4040 }
4041
4042 /* XXX: optimize */
4043 void stq_phys(hwaddr addr, uint64_t val)
4044 {
4045 val = tswap64(val);
4046 cpu_physical_memory_write(addr, &val, 8);
4047 }
4048
4049 void stq_le_phys(hwaddr addr, uint64_t val)
4050 {
4051 val = cpu_to_le64(val);
4052 cpu_physical_memory_write(addr, &val, 8);
4053 }
4054
4055 void stq_be_phys(hwaddr addr, uint64_t val)
4056 {
4057 val = cpu_to_be64(val);
4058 cpu_physical_memory_write(addr, &val, 8);
4059 }
4060
4061 /* virtual memory access for debug (includes writing to ROM) */
4062 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4063 uint8_t *buf, int len, int is_write)
4064 {
4065 int l;
4066 hwaddr phys_addr;
4067 target_ulong page;
4068
4069 while (len > 0) {
4070 page = addr & TARGET_PAGE_MASK;
4071 phys_addr = cpu_get_phys_page_debug(env, page);
4072 /* if no physical page mapped, return an error */
4073 if (phys_addr == -1)
4074 return -1;
4075 l = (page + TARGET_PAGE_SIZE) - addr;
4076 if (l > len)
4077 l = len;
4078 phys_addr += (addr & ~TARGET_PAGE_MASK);
4079 if (is_write)
4080 cpu_physical_memory_write_rom(phys_addr, buf, l);
4081 else
4082 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4083 len -= l;
4084 buf += l;
4085 addr += l;
4086 }
4087 return 0;
4088 }
4089 #endif
4090
4091 /* in deterministic execution mode, instructions doing device I/Os
4092 must be at the end of the TB */
4093 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4094 {
4095 TranslationBlock *tb;
4096 uint32_t n, cflags;
4097 target_ulong pc, cs_base;
4098 uint64_t flags;
4099
4100 tb = tb_find_pc(retaddr);
4101 if (!tb) {
4102 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4103 (void *)retaddr);
4104 }
4105 n = env->icount_decr.u16.low + tb->icount;
4106 cpu_restore_state(tb, env, retaddr);
4107 /* Calculate how many instructions had been executed before the fault
4108 occurred. */
4109 n = n - env->icount_decr.u16.low;
4110 /* Generate a new TB ending on the I/O insn. */
4111 n++;
4112 /* On MIPS and SH, delay slot instructions can only be restarted if
4113 they were already the first instruction in the TB. If this is not
4114 the first instruction in a TB then re-execute the preceding
4115 branch. */
4116 #if defined(TARGET_MIPS)
4117 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4118 env->active_tc.PC -= 4;
4119 env->icount_decr.u16.low++;
4120 env->hflags &= ~MIPS_HFLAG_BMASK;
4121 }
4122 #elif defined(TARGET_SH4)
4123 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4124 && n > 1) {
4125 env->pc -= 2;
4126 env->icount_decr.u16.low++;
4127 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4128 }
4129 #endif
4130 /* This should never happen. */
4131 if (n > CF_COUNT_MASK)
4132 cpu_abort(env, "TB too big during recompile");
4133
4134 cflags = n | CF_LAST_IO;
4135 pc = tb->pc;
4136 cs_base = tb->cs_base;
4137 flags = tb->flags;
4138 tb_phys_invalidate(tb, -1);
4139 /* FIXME: In theory this could raise an exception. In practice
4140 we have already translated the block once so it's probably ok. */
4141 tb_gen_code(env, pc, cs_base, flags, cflags);
4142 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4143 the first in the TB) then we end up generating a whole new TB and
4144 repeating the fault, which is horribly inefficient.
4145 Better would be to execute just this insn uncached, or generate a
4146 second new TB. */
4147 cpu_resume_from_signal(env, NULL);
4148 }
4149
4150 #if !defined(CONFIG_USER_ONLY)
4151
4152 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4153 {
4154 int i, target_code_size, max_target_code_size;
4155 int direct_jmp_count, direct_jmp2_count, cross_page;
4156 TranslationBlock *tb;
4157
4158 target_code_size = 0;
4159 max_target_code_size = 0;
4160 cross_page = 0;
4161 direct_jmp_count = 0;
4162 direct_jmp2_count = 0;
4163 for(i = 0; i < nb_tbs; i++) {
4164 tb = &tbs[i];
4165 target_code_size += tb->size;
4166 if (tb->size > max_target_code_size)
4167 max_target_code_size = tb->size;
4168 if (tb->page_addr[1] != -1)
4169 cross_page++;
4170 if (tb->tb_next_offset[0] != 0xffff) {
4171 direct_jmp_count++;
4172 if (tb->tb_next_offset[1] != 0xffff) {
4173 direct_jmp2_count++;
4174 }
4175 }
4176 }
4177 /* XXX: avoid using doubles ? */
4178 cpu_fprintf(f, "Translation buffer state:\n");
4179 cpu_fprintf(f, "gen code size %td/%zd\n",
4180 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4181 cpu_fprintf(f, "TB count %d/%d\n",
4182 nb_tbs, code_gen_max_blocks);
4183 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4184 nb_tbs ? target_code_size / nb_tbs : 0,
4185 max_target_code_size);
4186 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4187 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4188 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4189 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4190 cross_page,
4191 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4192 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4193 direct_jmp_count,
4194 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4195 direct_jmp2_count,
4196 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4197 cpu_fprintf(f, "\nStatistics:\n");
4198 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4199 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4200 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4201 tcg_dump_info(f, cpu_fprintf);
4202 }
4203
4204 /*
4205 * A helper function for the _utterly broken_ virtio device model to find out if
4206 * it's running on a big endian machine. Don't do this at home kids!
4207 */
4208 bool virtio_is_big_endian(void);
4209 bool virtio_is_big_endian(void)
4210 {
4211 #if defined(TARGET_WORDS_BIGENDIAN)
4212 return true;
4213 #else
4214 return false;
4215 #endif
4216 }
4217
4218 #endif
4219
4220 #ifndef CONFIG_USER_ONLY
4221 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4222 {
4223 MemoryRegionSection *section;
4224
4225 section = phys_page_find(address_space_memory.dispatch,
4226 phys_addr >> TARGET_PAGE_BITS);
4227
4228 return !(memory_region_is_ram(section->mr) ||
4229 memory_region_is_romd(section->mr));
4230 }
4231 #endif