]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: Allocate code_gen_prologue from code_gen_buffer
[mirror_qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 uint8_t *code_gen_prologue;
90 static uint8_t *code_gen_buffer;
91 static size_t code_gen_buffer_size;
92 /* threshold to flush the translated code buffer */
93 static size_t code_gen_buffer_max_size;
94 static uint8_t *code_gen_ptr;
95
96 #if !defined(CONFIG_USER_ONLY)
97 int phys_ram_fd;
98 static int in_migration;
99
100 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101
102 static MemoryRegion *system_memory;
103 static MemoryRegion *system_io;
104
105 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
106 static MemoryRegion io_mem_subpage_ram;
107
108 #endif
109
110 CPUArchState *first_cpu;
111 /* current CPU in the current thread. It is only valid inside
112 cpu_exec() */
113 DEFINE_TLS(CPUArchState *,cpu_single_env);
114 /* 0 = Do not count executed instructions.
115 1 = Precise instruction counting.
116 2 = Adaptive rate instruction counting. */
117 int use_icount = 0;
118
119 typedef struct PageDesc {
120 /* list of TBs intersecting this ram page */
121 TranslationBlock *first_tb;
122 /* in order to optimize self modifying code, we count the number
123 of lookups we do to a given page to use a bitmap */
124 unsigned int code_write_count;
125 uint8_t *code_bitmap;
126 #if defined(CONFIG_USER_ONLY)
127 unsigned long flags;
128 #endif
129 } PageDesc;
130
131 /* In system mode we want L1_MAP to be based on ram offsets,
132 while in user mode we want it to be based on virtual addresses. */
133 #if !defined(CONFIG_USER_ONLY)
134 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
135 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
136 #else
137 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
138 #endif
139 #else
140 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
141 #endif
142
143 /* Size of the L2 (and L3, etc) page tables. */
144 #define L2_BITS 10
145 #define L2_SIZE (1 << L2_BITS)
146
147 #define P_L2_LEVELS \
148 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
149
150 /* The bits remaining after N lower levels of page tables. */
151 #define V_L1_BITS_REM \
152 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
153
154 #if V_L1_BITS_REM < 4
155 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
156 #else
157 #define V_L1_BITS V_L1_BITS_REM
158 #endif
159
160 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
161
162 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
163
164 uintptr_t qemu_real_host_page_size;
165 uintptr_t qemu_host_page_size;
166 uintptr_t qemu_host_page_mask;
167
168 /* This is a multi-level map on the virtual address space.
169 The bottom level has pointers to PageDesc. */
170 static void *l1_map[V_L1_SIZE];
171
172 #if !defined(CONFIG_USER_ONLY)
173 typedef struct PhysPageEntry PhysPageEntry;
174
175 static MemoryRegionSection *phys_sections;
176 static unsigned phys_sections_nb, phys_sections_nb_alloc;
177 static uint16_t phys_section_unassigned;
178 static uint16_t phys_section_notdirty;
179 static uint16_t phys_section_rom;
180 static uint16_t phys_section_watch;
181
182 struct PhysPageEntry {
183 uint16_t is_leaf : 1;
184 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
185 uint16_t ptr : 15;
186 };
187
188 /* Simple allocator for PhysPageEntry nodes */
189 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
190 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
191
192 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
193
194 /* This is a multi-level map on the physical address space.
195 The bottom level has pointers to MemoryRegionSections. */
196 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
197
198 static void io_mem_init(void);
199 static void memory_map_init(void);
200
201 static MemoryRegion io_mem_watch;
202 #endif
203
204 /* statistics */
205 static int tb_flush_count;
206 static int tb_phys_invalidate_count;
207
208 #ifdef _WIN32
209 static inline void map_exec(void *addr, long size)
210 {
211 DWORD old_protect;
212 VirtualProtect(addr, size,
213 PAGE_EXECUTE_READWRITE, &old_protect);
214
215 }
216 #else
217 static inline void map_exec(void *addr, long size)
218 {
219 unsigned long start, end, page_size;
220
221 page_size = getpagesize();
222 start = (unsigned long)addr;
223 start &= ~(page_size - 1);
224
225 end = (unsigned long)addr + size;
226 end += page_size - 1;
227 end &= ~(page_size - 1);
228
229 mprotect((void *)start, end - start,
230 PROT_READ | PROT_WRITE | PROT_EXEC);
231 }
232 #endif
233
234 static void page_init(void)
235 {
236 /* NOTE: we can always suppose that qemu_host_page_size >=
237 TARGET_PAGE_SIZE */
238 #ifdef _WIN32
239 {
240 SYSTEM_INFO system_info;
241
242 GetSystemInfo(&system_info);
243 qemu_real_host_page_size = system_info.dwPageSize;
244 }
245 #else
246 qemu_real_host_page_size = getpagesize();
247 #endif
248 if (qemu_host_page_size == 0)
249 qemu_host_page_size = qemu_real_host_page_size;
250 if (qemu_host_page_size < TARGET_PAGE_SIZE)
251 qemu_host_page_size = TARGET_PAGE_SIZE;
252 qemu_host_page_mask = ~(qemu_host_page_size - 1);
253
254 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
255 {
256 #ifdef HAVE_KINFO_GETVMMAP
257 struct kinfo_vmentry *freep;
258 int i, cnt;
259
260 freep = kinfo_getvmmap(getpid(), &cnt);
261 if (freep) {
262 mmap_lock();
263 for (i = 0; i < cnt; i++) {
264 unsigned long startaddr, endaddr;
265
266 startaddr = freep[i].kve_start;
267 endaddr = freep[i].kve_end;
268 if (h2g_valid(startaddr)) {
269 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
270
271 if (h2g_valid(endaddr)) {
272 endaddr = h2g(endaddr);
273 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
274 } else {
275 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
276 endaddr = ~0ul;
277 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
278 #endif
279 }
280 }
281 }
282 free(freep);
283 mmap_unlock();
284 }
285 #else
286 FILE *f;
287
288 last_brk = (unsigned long)sbrk(0);
289
290 f = fopen("/compat/linux/proc/self/maps", "r");
291 if (f) {
292 mmap_lock();
293
294 do {
295 unsigned long startaddr, endaddr;
296 int n;
297
298 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
299
300 if (n == 2 && h2g_valid(startaddr)) {
301 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
302
303 if (h2g_valid(endaddr)) {
304 endaddr = h2g(endaddr);
305 } else {
306 endaddr = ~0ul;
307 }
308 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
309 }
310 } while (!feof(f));
311
312 fclose(f);
313 mmap_unlock();
314 }
315 #endif
316 }
317 #endif
318 }
319
320 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
321 {
322 PageDesc *pd;
323 void **lp;
324 int i;
325
326 #if defined(CONFIG_USER_ONLY)
327 /* We can't use g_malloc because it may recurse into a locked mutex. */
328 # define ALLOC(P, SIZE) \
329 do { \
330 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
331 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
332 } while (0)
333 #else
334 # define ALLOC(P, SIZE) \
335 do { P = g_malloc0(SIZE); } while (0)
336 #endif
337
338 /* Level 1. Always allocated. */
339 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
340
341 /* Level 2..N-1. */
342 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
343 void **p = *lp;
344
345 if (p == NULL) {
346 if (!alloc) {
347 return NULL;
348 }
349 ALLOC(p, sizeof(void *) * L2_SIZE);
350 *lp = p;
351 }
352
353 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
354 }
355
356 pd = *lp;
357 if (pd == NULL) {
358 if (!alloc) {
359 return NULL;
360 }
361 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
362 *lp = pd;
363 }
364
365 #undef ALLOC
366
367 return pd + (index & (L2_SIZE - 1));
368 }
369
370 static inline PageDesc *page_find(tb_page_addr_t index)
371 {
372 return page_find_alloc(index, 0);
373 }
374
375 #if !defined(CONFIG_USER_ONLY)
376
377 static void phys_map_node_reserve(unsigned nodes)
378 {
379 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
380 typedef PhysPageEntry Node[L2_SIZE];
381 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
382 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
383 phys_map_nodes_nb + nodes);
384 phys_map_nodes = g_renew(Node, phys_map_nodes,
385 phys_map_nodes_nb_alloc);
386 }
387 }
388
389 static uint16_t phys_map_node_alloc(void)
390 {
391 unsigned i;
392 uint16_t ret;
393
394 ret = phys_map_nodes_nb++;
395 assert(ret != PHYS_MAP_NODE_NIL);
396 assert(ret != phys_map_nodes_nb_alloc);
397 for (i = 0; i < L2_SIZE; ++i) {
398 phys_map_nodes[ret][i].is_leaf = 0;
399 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
400 }
401 return ret;
402 }
403
404 static void phys_map_nodes_reset(void)
405 {
406 phys_map_nodes_nb = 0;
407 }
408
409
410 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
411 target_phys_addr_t *nb, uint16_t leaf,
412 int level)
413 {
414 PhysPageEntry *p;
415 int i;
416 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
417
418 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
419 lp->ptr = phys_map_node_alloc();
420 p = phys_map_nodes[lp->ptr];
421 if (level == 0) {
422 for (i = 0; i < L2_SIZE; i++) {
423 p[i].is_leaf = 1;
424 p[i].ptr = phys_section_unassigned;
425 }
426 }
427 } else {
428 p = phys_map_nodes[lp->ptr];
429 }
430 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
431
432 while (*nb && lp < &p[L2_SIZE]) {
433 if ((*index & (step - 1)) == 0 && *nb >= step) {
434 lp->is_leaf = true;
435 lp->ptr = leaf;
436 *index += step;
437 *nb -= step;
438 } else {
439 phys_page_set_level(lp, index, nb, leaf, level - 1);
440 }
441 ++lp;
442 }
443 }
444
445 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
446 uint16_t leaf)
447 {
448 /* Wildly overreserve - it doesn't matter much. */
449 phys_map_node_reserve(3 * P_L2_LEVELS);
450
451 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
452 }
453
454 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
455 {
456 PhysPageEntry lp = phys_map;
457 PhysPageEntry *p;
458 int i;
459 uint16_t s_index = phys_section_unassigned;
460
461 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
462 if (lp.ptr == PHYS_MAP_NODE_NIL) {
463 goto not_found;
464 }
465 p = phys_map_nodes[lp.ptr];
466 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
467 }
468
469 s_index = lp.ptr;
470 not_found:
471 return &phys_sections[s_index];
472 }
473
474 bool memory_region_is_unassigned(MemoryRegion *mr)
475 {
476 return mr != &io_mem_ram && mr != &io_mem_rom
477 && mr != &io_mem_notdirty && !mr->rom_device
478 && mr != &io_mem_watch;
479 }
480
481 #define mmap_lock() do { } while(0)
482 #define mmap_unlock() do { } while(0)
483 #endif
484
485 #if defined(CONFIG_USER_ONLY)
486 /* Currently it is not recommended to allocate big chunks of data in
487 user mode. It will change when a dedicated libc will be used. */
488 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
489 region in which the guest needs to run. Revisit this. */
490 #define USE_STATIC_CODE_GEN_BUFFER
491 #endif
492
493 /* ??? Should configure for this, not list operating systems here. */
494 #if (defined(__linux__) \
495 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
496 || defined(__DragonFly__) || defined(__OpenBSD__) \
497 || defined(__NetBSD__))
498 # define USE_MMAP
499 #endif
500
501 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
502 indicated, this is constrained by the range of direct branches on the
503 host cpu, as used by the TCG implementation of goto_tb. */
504 #if defined(__x86_64__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
506 #elif defined(__sparc__)
507 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
508 #elif defined(__arm__)
509 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
510 #elif defined(__s390x__)
511 /* We have a +- 4GB range on the branches; leave some slop. */
512 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
513 #else
514 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
515 #endif
516
517 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
518
519 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
520 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
521 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
522
523 static inline size_t size_code_gen_buffer(size_t tb_size)
524 {
525 /* Size the buffer. */
526 if (tb_size == 0) {
527 #ifdef USE_STATIC_CODE_GEN_BUFFER
528 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
529 #else
530 /* ??? Needs adjustments. */
531 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
532 static buffer, we could size this on RESERVED_VA, on the text
533 segment size of the executable, or continue to use the default. */
534 tb_size = (unsigned long)(ram_size / 4);
535 #endif
536 }
537 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
538 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
539 }
540 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
541 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
542 }
543 code_gen_buffer_size = tb_size;
544 return tb_size;
545 }
546
547 #ifdef USE_STATIC_CODE_GEN_BUFFER
548 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
549 __attribute__((aligned(CODE_GEN_ALIGN)));
550
551 static inline void *alloc_code_gen_buffer(void)
552 {
553 map_exec(static_code_gen_buffer, code_gen_buffer_size);
554 return static_code_gen_buffer;
555 }
556 #elif defined(USE_MMAP)
557 static inline void *alloc_code_gen_buffer(void)
558 {
559 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
560 uintptr_t start = 0;
561 void *buf;
562
563 /* Constrain the position of the buffer based on the host cpu.
564 Note that these addresses are chosen in concert with the
565 addresses assigned in the relevant linker script file. */
566 # if defined(__PIE__) || defined(__PIC__)
567 /* Don't bother setting a preferred location if we're building
568 a position-independent executable. We're more likely to get
569 an address near the main executable if we let the kernel
570 choose the address. */
571 # elif defined(__x86_64__) && defined(MAP_32BIT)
572 /* Force the memory down into low memory with the executable.
573 Leave the choice of exact location with the kernel. */
574 flags |= MAP_32BIT;
575 /* Cannot expect to map more than 800MB in low memory. */
576 if (code_gen_buffer_size > 800u * 1024 * 1024) {
577 code_gen_buffer_size = 800u * 1024 * 1024;
578 }
579 # elif defined(__sparc__)
580 start = 0x40000000ul;
581 # elif defined(__s390x__)
582 start = 0x90000000ul;
583 # endif
584
585 buf = mmap((void *)start, code_gen_buffer_size,
586 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
587 return buf == MAP_FAILED ? NULL : buf;
588 }
589 #else
590 static inline void *alloc_code_gen_buffer(void)
591 {
592 void *buf = g_malloc(code_gen_buffer_size);
593 if (buf) {
594 map_exec(buf, code_gen_buffer_size);
595 }
596 return buf;
597 }
598 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
599
600 static inline void code_gen_alloc(size_t tb_size)
601 {
602 code_gen_buffer_size = size_code_gen_buffer(tb_size);
603 code_gen_buffer = alloc_code_gen_buffer();
604 if (code_gen_buffer == NULL) {
605 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
606 exit(1);
607 }
608
609 /* Steal room for the prologue at the end of the buffer. This ensures
610 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
611 from TB's to the prologue are going to be in range. It also means
612 that we don't need to mark (additional) portions of the data segment
613 as executable. */
614 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
615 code_gen_buffer_size -= 1024;
616
617 code_gen_buffer_max_size = code_gen_buffer_size -
618 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
619 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
620 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
621 }
622
623 /* Must be called before using the QEMU cpus. 'tb_size' is the size
624 (in bytes) allocated to the translation buffer. Zero means default
625 size. */
626 void tcg_exec_init(unsigned long tb_size)
627 {
628 cpu_gen_init();
629 code_gen_alloc(tb_size);
630 code_gen_ptr = code_gen_buffer;
631 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 page_init();
633 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
634 /* There's no guest base to take into account, so go ahead and
635 initialize the prologue now. */
636 tcg_prologue_init(&tcg_ctx);
637 #endif
638 }
639
640 bool tcg_enabled(void)
641 {
642 return code_gen_buffer != NULL;
643 }
644
645 void cpu_exec_init_all(void)
646 {
647 #if !defined(CONFIG_USER_ONLY)
648 memory_map_init();
649 io_mem_init();
650 #endif
651 }
652
653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654
655 static int cpu_common_post_load(void *opaque, int version_id)
656 {
657 CPUArchState *env = opaque;
658
659 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
660 version_id is increased. */
661 env->interrupt_request &= ~0x01;
662 tlb_flush(env, 1);
663
664 return 0;
665 }
666
667 static const VMStateDescription vmstate_cpu_common = {
668 .name = "cpu_common",
669 .version_id = 1,
670 .minimum_version_id = 1,
671 .minimum_version_id_old = 1,
672 .post_load = cpu_common_post_load,
673 .fields = (VMStateField []) {
674 VMSTATE_UINT32(halted, CPUArchState),
675 VMSTATE_UINT32(interrupt_request, CPUArchState),
676 VMSTATE_END_OF_LIST()
677 }
678 };
679 #endif
680
681 CPUArchState *qemu_get_cpu(int cpu)
682 {
683 CPUArchState *env = first_cpu;
684
685 while (env) {
686 if (env->cpu_index == cpu)
687 break;
688 env = env->next_cpu;
689 }
690
691 return env;
692 }
693
694 void cpu_exec_init(CPUArchState *env)
695 {
696 CPUArchState **penv;
697 int cpu_index;
698
699 #if defined(CONFIG_USER_ONLY)
700 cpu_list_lock();
701 #endif
702 env->next_cpu = NULL;
703 penv = &first_cpu;
704 cpu_index = 0;
705 while (*penv != NULL) {
706 penv = &(*penv)->next_cpu;
707 cpu_index++;
708 }
709 env->cpu_index = cpu_index;
710 env->numa_node = 0;
711 QTAILQ_INIT(&env->breakpoints);
712 QTAILQ_INIT(&env->watchpoints);
713 #ifndef CONFIG_USER_ONLY
714 env->thread_id = qemu_get_thread_id();
715 #endif
716 *penv = env;
717 #if defined(CONFIG_USER_ONLY)
718 cpu_list_unlock();
719 #endif
720 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
721 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
722 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
723 cpu_save, cpu_load, env);
724 #endif
725 }
726
727 /* Allocate a new translation block. Flush the translation buffer if
728 too many translation blocks or too much generated code. */
729 static TranslationBlock *tb_alloc(target_ulong pc)
730 {
731 TranslationBlock *tb;
732
733 if (nb_tbs >= code_gen_max_blocks ||
734 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
735 return NULL;
736 tb = &tbs[nb_tbs++];
737 tb->pc = pc;
738 tb->cflags = 0;
739 return tb;
740 }
741
742 void tb_free(TranslationBlock *tb)
743 {
744 /* In practice this is mostly used for single use temporary TB
745 Ignore the hard cases and just back up if this TB happens to
746 be the last one generated. */
747 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
748 code_gen_ptr = tb->tc_ptr;
749 nb_tbs--;
750 }
751 }
752
753 static inline void invalidate_page_bitmap(PageDesc *p)
754 {
755 if (p->code_bitmap) {
756 g_free(p->code_bitmap);
757 p->code_bitmap = NULL;
758 }
759 p->code_write_count = 0;
760 }
761
762 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763
764 static void page_flush_tb_1 (int level, void **lp)
765 {
766 int i;
767
768 if (*lp == NULL) {
769 return;
770 }
771 if (level == 0) {
772 PageDesc *pd = *lp;
773 for (i = 0; i < L2_SIZE; ++i) {
774 pd[i].first_tb = NULL;
775 invalidate_page_bitmap(pd + i);
776 }
777 } else {
778 void **pp = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 page_flush_tb_1 (level - 1, pp + i);
781 }
782 }
783 }
784
785 static void page_flush_tb(void)
786 {
787 int i;
788 for (i = 0; i < V_L1_SIZE; i++) {
789 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
790 }
791 }
792
793 /* flush all the translation blocks */
794 /* XXX: tb_flush is currently not thread safe */
795 void tb_flush(CPUArchState *env1)
796 {
797 CPUArchState *env;
798 #if defined(DEBUG_FLUSH)
799 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
800 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 nb_tbs, nb_tbs > 0 ?
802 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 #endif
804 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
805 cpu_abort(env1, "Internal error: code buffer overflow\n");
806
807 nb_tbs = 0;
808
809 for(env = first_cpu; env != NULL; env = env->next_cpu) {
810 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 }
812
813 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
814 page_flush_tb();
815
816 code_gen_ptr = code_gen_buffer;
817 /* XXX: flush processor icache at this point if cache flush is
818 expensive */
819 tb_flush_count++;
820 }
821
822 #ifdef DEBUG_TB_CHECK
823
824 static void tb_invalidate_check(target_ulong address)
825 {
826 TranslationBlock *tb;
827 int i;
828 address &= TARGET_PAGE_MASK;
829 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
830 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
831 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
832 address >= tb->pc + tb->size)) {
833 printf("ERROR invalidate: address=" TARGET_FMT_lx
834 " PC=%08lx size=%04x\n",
835 address, (long)tb->pc, tb->size);
836 }
837 }
838 }
839 }
840
841 /* verify that all the pages have correct rights for code */
842 static void tb_page_check(void)
843 {
844 TranslationBlock *tb;
845 int i, flags1, flags2;
846
847 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
848 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
849 flags1 = page_get_flags(tb->pc);
850 flags2 = page_get_flags(tb->pc + tb->size - 1);
851 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
852 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
853 (long)tb->pc, tb->size, flags1, flags2);
854 }
855 }
856 }
857 }
858
859 #endif
860
861 /* invalidate one TB */
862 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
863 int next_offset)
864 {
865 TranslationBlock *tb1;
866 for(;;) {
867 tb1 = *ptb;
868 if (tb1 == tb) {
869 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
870 break;
871 }
872 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
873 }
874 }
875
876 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 {
878 TranslationBlock *tb1;
879 unsigned int n1;
880
881 for(;;) {
882 tb1 = *ptb;
883 n1 = (uintptr_t)tb1 & 3;
884 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 if (tb1 == tb) {
886 *ptb = tb1->page_next[n1];
887 break;
888 }
889 ptb = &tb1->page_next[n1];
890 }
891 }
892
893 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 {
895 TranslationBlock *tb1, **ptb;
896 unsigned int n1;
897
898 ptb = &tb->jmp_next[n];
899 tb1 = *ptb;
900 if (tb1) {
901 /* find tb(n) in circular list */
902 for(;;) {
903 tb1 = *ptb;
904 n1 = (uintptr_t)tb1 & 3;
905 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
906 if (n1 == n && tb1 == tb)
907 break;
908 if (n1 == 2) {
909 ptb = &tb1->jmp_first;
910 } else {
911 ptb = &tb1->jmp_next[n1];
912 }
913 }
914 /* now we can suppress tb(n) from the list */
915 *ptb = tb->jmp_next[n];
916
917 tb->jmp_next[n] = NULL;
918 }
919 }
920
921 /* reset the jump entry 'n' of a TB so that it is not chained to
922 another TB */
923 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 {
925 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
926 }
927
928 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 {
930 CPUArchState *env;
931 PageDesc *p;
932 unsigned int h, n1;
933 tb_page_addr_t phys_pc;
934 TranslationBlock *tb1, *tb2;
935
936 /* remove the TB from the hash list */
937 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
938 h = tb_phys_hash_func(phys_pc);
939 tb_remove(&tb_phys_hash[h], tb,
940 offsetof(TranslationBlock, phys_hash_next));
941
942 /* remove the TB from the page list */
943 if (tb->page_addr[0] != page_addr) {
944 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
945 tb_page_remove(&p->first_tb, tb);
946 invalidate_page_bitmap(p);
947 }
948 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
949 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
950 tb_page_remove(&p->first_tb, tb);
951 invalidate_page_bitmap(p);
952 }
953
954 tb_invalidated_flag = 1;
955
956 /* remove the TB from the hash list */
957 h = tb_jmp_cache_hash_func(tb->pc);
958 for(env = first_cpu; env != NULL; env = env->next_cpu) {
959 if (env->tb_jmp_cache[h] == tb)
960 env->tb_jmp_cache[h] = NULL;
961 }
962
963 /* suppress this TB from the two jump lists */
964 tb_jmp_remove(tb, 0);
965 tb_jmp_remove(tb, 1);
966
967 /* suppress any remaining jumps to this TB */
968 tb1 = tb->jmp_first;
969 for(;;) {
970 n1 = (uintptr_t)tb1 & 3;
971 if (n1 == 2)
972 break;
973 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
974 tb2 = tb1->jmp_next[n1];
975 tb_reset_jump(tb1, n1);
976 tb1->jmp_next[n1] = NULL;
977 tb1 = tb2;
978 }
979 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980
981 tb_phys_invalidate_count++;
982 }
983
984 static inline void set_bits(uint8_t *tab, int start, int len)
985 {
986 int end, mask, end1;
987
988 end = start + len;
989 tab += start >> 3;
990 mask = 0xff << (start & 7);
991 if ((start & ~7) == (end & ~7)) {
992 if (start < end) {
993 mask &= ~(0xff << (end & 7));
994 *tab |= mask;
995 }
996 } else {
997 *tab++ |= mask;
998 start = (start + 8) & ~7;
999 end1 = end & ~7;
1000 while (start < end1) {
1001 *tab++ = 0xff;
1002 start += 8;
1003 }
1004 if (start < end) {
1005 mask = ~(0xff << (end & 7));
1006 *tab |= mask;
1007 }
1008 }
1009 }
1010
1011 static void build_page_bitmap(PageDesc *p)
1012 {
1013 int n, tb_start, tb_end;
1014 TranslationBlock *tb;
1015
1016 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1017
1018 tb = p->first_tb;
1019 while (tb != NULL) {
1020 n = (uintptr_t)tb & 3;
1021 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1022 /* NOTE: this is subtle as a TB may span two physical pages */
1023 if (n == 0) {
1024 /* NOTE: tb_end may be after the end of the page, but
1025 it is not a problem */
1026 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1027 tb_end = tb_start + tb->size;
1028 if (tb_end > TARGET_PAGE_SIZE)
1029 tb_end = TARGET_PAGE_SIZE;
1030 } else {
1031 tb_start = 0;
1032 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 }
1034 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1035 tb = tb->page_next[n];
1036 }
1037 }
1038
1039 TranslationBlock *tb_gen_code(CPUArchState *env,
1040 target_ulong pc, target_ulong cs_base,
1041 int flags, int cflags)
1042 {
1043 TranslationBlock *tb;
1044 uint8_t *tc_ptr;
1045 tb_page_addr_t phys_pc, phys_page2;
1046 target_ulong virt_page2;
1047 int code_gen_size;
1048
1049 phys_pc = get_page_addr_code(env, pc);
1050 tb = tb_alloc(pc);
1051 if (!tb) {
1052 /* flush must be done */
1053 tb_flush(env);
1054 /* cannot fail at this point */
1055 tb = tb_alloc(pc);
1056 /* Don't forget to invalidate previous TB info. */
1057 tb_invalidated_flag = 1;
1058 }
1059 tc_ptr = code_gen_ptr;
1060 tb->tc_ptr = tc_ptr;
1061 tb->cs_base = cs_base;
1062 tb->flags = flags;
1063 tb->cflags = cflags;
1064 cpu_gen_code(env, tb, &code_gen_size);
1065 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1066 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067
1068 /* check next page if needed */
1069 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 phys_page2 = -1;
1071 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1072 phys_page2 = get_page_addr_code(env, virt_page2);
1073 }
1074 tb_link_page(tb, phys_pc, phys_page2);
1075 return tb;
1076 }
1077
1078 /*
1079 * Invalidate all TBs which intersect with the target physical address range
1080 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1081 * 'is_cpu_write_access' should be true if called from a real cpu write
1082 * access: the virtual CPU will exit the current TB if code is modified inside
1083 * this TB.
1084 */
1085 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1086 int is_cpu_write_access)
1087 {
1088 while (start < end) {
1089 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1090 start &= TARGET_PAGE_MASK;
1091 start += TARGET_PAGE_SIZE;
1092 }
1093 }
1094
1095 /*
1096 * Invalidate all TBs which intersect with the target physical address range
1097 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1098 * 'is_cpu_write_access' should be true if called from a real cpu write
1099 * access: the virtual CPU will exit the current TB if code is modified inside
1100 * this TB.
1101 */
1102 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1103 int is_cpu_write_access)
1104 {
1105 TranslationBlock *tb, *tb_next, *saved_tb;
1106 CPUArchState *env = cpu_single_env;
1107 tb_page_addr_t tb_start, tb_end;
1108 PageDesc *p;
1109 int n;
1110 #ifdef TARGET_HAS_PRECISE_SMC
1111 int current_tb_not_found = is_cpu_write_access;
1112 TranslationBlock *current_tb = NULL;
1113 int current_tb_modified = 0;
1114 target_ulong current_pc = 0;
1115 target_ulong current_cs_base = 0;
1116 int current_flags = 0;
1117 #endif /* TARGET_HAS_PRECISE_SMC */
1118
1119 p = page_find(start >> TARGET_PAGE_BITS);
1120 if (!p)
1121 return;
1122 if (!p->code_bitmap &&
1123 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1124 is_cpu_write_access) {
1125 /* build code bitmap */
1126 build_page_bitmap(p);
1127 }
1128
1129 /* we remove all the TBs in the range [start, end[ */
1130 /* XXX: see if in some cases it could be faster to invalidate all the code */
1131 tb = p->first_tb;
1132 while (tb != NULL) {
1133 n = (uintptr_t)tb & 3;
1134 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1135 tb_next = tb->page_next[n];
1136 /* NOTE: this is subtle as a TB may span two physical pages */
1137 if (n == 0) {
1138 /* NOTE: tb_end may be after the end of the page, but
1139 it is not a problem */
1140 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1141 tb_end = tb_start + tb->size;
1142 } else {
1143 tb_start = tb->page_addr[1];
1144 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1145 }
1146 if (!(tb_end <= start || tb_start >= end)) {
1147 #ifdef TARGET_HAS_PRECISE_SMC
1148 if (current_tb_not_found) {
1149 current_tb_not_found = 0;
1150 current_tb = NULL;
1151 if (env->mem_io_pc) {
1152 /* now we have a real cpu fault */
1153 current_tb = tb_find_pc(env->mem_io_pc);
1154 }
1155 }
1156 if (current_tb == tb &&
1157 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1158 /* If we are modifying the current TB, we must stop
1159 its execution. We could be more precise by checking
1160 that the modification is after the current PC, but it
1161 would require a specialized function to partially
1162 restore the CPU state */
1163
1164 current_tb_modified = 1;
1165 cpu_restore_state(current_tb, env, env->mem_io_pc);
1166 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1167 &current_flags);
1168 }
1169 #endif /* TARGET_HAS_PRECISE_SMC */
1170 /* we need to do that to handle the case where a signal
1171 occurs while doing tb_phys_invalidate() */
1172 saved_tb = NULL;
1173 if (env) {
1174 saved_tb = env->current_tb;
1175 env->current_tb = NULL;
1176 }
1177 tb_phys_invalidate(tb, -1);
1178 if (env) {
1179 env->current_tb = saved_tb;
1180 if (env->interrupt_request && env->current_tb)
1181 cpu_interrupt(env, env->interrupt_request);
1182 }
1183 }
1184 tb = tb_next;
1185 }
1186 #if !defined(CONFIG_USER_ONLY)
1187 /* if no code remaining, no need to continue to use slow writes */
1188 if (!p->first_tb) {
1189 invalidate_page_bitmap(p);
1190 if (is_cpu_write_access) {
1191 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1192 }
1193 }
1194 #endif
1195 #ifdef TARGET_HAS_PRECISE_SMC
1196 if (current_tb_modified) {
1197 /* we generate a block containing just the instruction
1198 modifying the memory. It will ensure that it cannot modify
1199 itself */
1200 env->current_tb = NULL;
1201 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1202 cpu_resume_from_signal(env, NULL);
1203 }
1204 #endif
1205 }
1206
1207 /* len must be <= 8 and start must be a multiple of len */
1208 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1209 {
1210 PageDesc *p;
1211 int offset, b;
1212 #if 0
1213 if (1) {
1214 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1215 cpu_single_env->mem_io_vaddr, len,
1216 cpu_single_env->eip,
1217 cpu_single_env->eip +
1218 (intptr_t)cpu_single_env->segs[R_CS].base);
1219 }
1220 #endif
1221 p = page_find(start >> TARGET_PAGE_BITS);
1222 if (!p)
1223 return;
1224 if (p->code_bitmap) {
1225 offset = start & ~TARGET_PAGE_MASK;
1226 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1227 if (b & ((1 << len) - 1))
1228 goto do_invalidate;
1229 } else {
1230 do_invalidate:
1231 tb_invalidate_phys_page_range(start, start + len, 1);
1232 }
1233 }
1234
1235 #if !defined(CONFIG_SOFTMMU)
1236 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1237 uintptr_t pc, void *puc)
1238 {
1239 TranslationBlock *tb;
1240 PageDesc *p;
1241 int n;
1242 #ifdef TARGET_HAS_PRECISE_SMC
1243 TranslationBlock *current_tb = NULL;
1244 CPUArchState *env = cpu_single_env;
1245 int current_tb_modified = 0;
1246 target_ulong current_pc = 0;
1247 target_ulong current_cs_base = 0;
1248 int current_flags = 0;
1249 #endif
1250
1251 addr &= TARGET_PAGE_MASK;
1252 p = page_find(addr >> TARGET_PAGE_BITS);
1253 if (!p)
1254 return;
1255 tb = p->first_tb;
1256 #ifdef TARGET_HAS_PRECISE_SMC
1257 if (tb && pc != 0) {
1258 current_tb = tb_find_pc(pc);
1259 }
1260 #endif
1261 while (tb != NULL) {
1262 n = (uintptr_t)tb & 3;
1263 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1264 #ifdef TARGET_HAS_PRECISE_SMC
1265 if (current_tb == tb &&
1266 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1267 /* If we are modifying the current TB, we must stop
1268 its execution. We could be more precise by checking
1269 that the modification is after the current PC, but it
1270 would require a specialized function to partially
1271 restore the CPU state */
1272
1273 current_tb_modified = 1;
1274 cpu_restore_state(current_tb, env, pc);
1275 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1276 &current_flags);
1277 }
1278 #endif /* TARGET_HAS_PRECISE_SMC */
1279 tb_phys_invalidate(tb, addr);
1280 tb = tb->page_next[n];
1281 }
1282 p->first_tb = NULL;
1283 #ifdef TARGET_HAS_PRECISE_SMC
1284 if (current_tb_modified) {
1285 /* we generate a block containing just the instruction
1286 modifying the memory. It will ensure that it cannot modify
1287 itself */
1288 env->current_tb = NULL;
1289 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1290 cpu_resume_from_signal(env, puc);
1291 }
1292 #endif
1293 }
1294 #endif
1295
1296 /* add the tb in the target page and protect it if necessary */
1297 static inline void tb_alloc_page(TranslationBlock *tb,
1298 unsigned int n, tb_page_addr_t page_addr)
1299 {
1300 PageDesc *p;
1301 #ifndef CONFIG_USER_ONLY
1302 bool page_already_protected;
1303 #endif
1304
1305 tb->page_addr[n] = page_addr;
1306 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1307 tb->page_next[n] = p->first_tb;
1308 #ifndef CONFIG_USER_ONLY
1309 page_already_protected = p->first_tb != NULL;
1310 #endif
1311 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1312 invalidate_page_bitmap(p);
1313
1314 #if defined(TARGET_HAS_SMC) || 1
1315
1316 #if defined(CONFIG_USER_ONLY)
1317 if (p->flags & PAGE_WRITE) {
1318 target_ulong addr;
1319 PageDesc *p2;
1320 int prot;
1321
1322 /* force the host page as non writable (writes will have a
1323 page fault + mprotect overhead) */
1324 page_addr &= qemu_host_page_mask;
1325 prot = 0;
1326 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1327 addr += TARGET_PAGE_SIZE) {
1328
1329 p2 = page_find (addr >> TARGET_PAGE_BITS);
1330 if (!p2)
1331 continue;
1332 prot |= p2->flags;
1333 p2->flags &= ~PAGE_WRITE;
1334 }
1335 mprotect(g2h(page_addr), qemu_host_page_size,
1336 (prot & PAGE_BITS) & ~PAGE_WRITE);
1337 #ifdef DEBUG_TB_INVALIDATE
1338 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1339 page_addr);
1340 #endif
1341 }
1342 #else
1343 /* if some code is already present, then the pages are already
1344 protected. So we handle the case where only the first TB is
1345 allocated in a physical page */
1346 if (!page_already_protected) {
1347 tlb_protect_code(page_addr);
1348 }
1349 #endif
1350
1351 #endif /* TARGET_HAS_SMC */
1352 }
1353
1354 /* add a new TB and link it to the physical page tables. phys_page2 is
1355 (-1) to indicate that only one page contains the TB. */
1356 void tb_link_page(TranslationBlock *tb,
1357 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1358 {
1359 unsigned int h;
1360 TranslationBlock **ptb;
1361
1362 /* Grab the mmap lock to stop another thread invalidating this TB
1363 before we are done. */
1364 mmap_lock();
1365 /* add in the physical hash table */
1366 h = tb_phys_hash_func(phys_pc);
1367 ptb = &tb_phys_hash[h];
1368 tb->phys_hash_next = *ptb;
1369 *ptb = tb;
1370
1371 /* add in the page list */
1372 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1373 if (phys_page2 != -1)
1374 tb_alloc_page(tb, 1, phys_page2);
1375 else
1376 tb->page_addr[1] = -1;
1377
1378 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1379 tb->jmp_next[0] = NULL;
1380 tb->jmp_next[1] = NULL;
1381
1382 /* init original jump addresses */
1383 if (tb->tb_next_offset[0] != 0xffff)
1384 tb_reset_jump(tb, 0);
1385 if (tb->tb_next_offset[1] != 0xffff)
1386 tb_reset_jump(tb, 1);
1387
1388 #ifdef DEBUG_TB_CHECK
1389 tb_page_check();
1390 #endif
1391 mmap_unlock();
1392 }
1393
1394 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1395 tb[1].tc_ptr. Return NULL if not found */
1396 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1397 {
1398 int m_min, m_max, m;
1399 uintptr_t v;
1400 TranslationBlock *tb;
1401
1402 if (nb_tbs <= 0)
1403 return NULL;
1404 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1405 tc_ptr >= (uintptr_t)code_gen_ptr) {
1406 return NULL;
1407 }
1408 /* binary search (cf Knuth) */
1409 m_min = 0;
1410 m_max = nb_tbs - 1;
1411 while (m_min <= m_max) {
1412 m = (m_min + m_max) >> 1;
1413 tb = &tbs[m];
1414 v = (uintptr_t)tb->tc_ptr;
1415 if (v == tc_ptr)
1416 return tb;
1417 else if (tc_ptr < v) {
1418 m_max = m - 1;
1419 } else {
1420 m_min = m + 1;
1421 }
1422 }
1423 return &tbs[m_max];
1424 }
1425
1426 static void tb_reset_jump_recursive(TranslationBlock *tb);
1427
1428 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1429 {
1430 TranslationBlock *tb1, *tb_next, **ptb;
1431 unsigned int n1;
1432
1433 tb1 = tb->jmp_next[n];
1434 if (tb1 != NULL) {
1435 /* find head of list */
1436 for(;;) {
1437 n1 = (uintptr_t)tb1 & 3;
1438 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1439 if (n1 == 2)
1440 break;
1441 tb1 = tb1->jmp_next[n1];
1442 }
1443 /* we are now sure now that tb jumps to tb1 */
1444 tb_next = tb1;
1445
1446 /* remove tb from the jmp_first list */
1447 ptb = &tb_next->jmp_first;
1448 for(;;) {
1449 tb1 = *ptb;
1450 n1 = (uintptr_t)tb1 & 3;
1451 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1452 if (n1 == n && tb1 == tb)
1453 break;
1454 ptb = &tb1->jmp_next[n1];
1455 }
1456 *ptb = tb->jmp_next[n];
1457 tb->jmp_next[n] = NULL;
1458
1459 /* suppress the jump to next tb in generated code */
1460 tb_reset_jump(tb, n);
1461
1462 /* suppress jumps in the tb on which we could have jumped */
1463 tb_reset_jump_recursive(tb_next);
1464 }
1465 }
1466
1467 static void tb_reset_jump_recursive(TranslationBlock *tb)
1468 {
1469 tb_reset_jump_recursive2(tb, 0);
1470 tb_reset_jump_recursive2(tb, 1);
1471 }
1472
1473 #if defined(TARGET_HAS_ICE)
1474 #if defined(CONFIG_USER_ONLY)
1475 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1476 {
1477 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1478 }
1479 #else
1480 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1481 {
1482 ram_addr_t ram_addr;
1483 MemoryRegionSection *section;
1484
1485 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1486 if (!(memory_region_is_ram(section->mr)
1487 || (section->mr->rom_device && section->mr->readable))) {
1488 return;
1489 }
1490 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1491 + memory_region_section_addr(section, addr);
1492 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1493 }
1494
1495 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1496 {
1497 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1498 (pc & ~TARGET_PAGE_MASK));
1499 }
1500 #endif
1501 #endif /* TARGET_HAS_ICE */
1502
1503 #if defined(CONFIG_USER_ONLY)
1504 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1505
1506 {
1507 }
1508
1509 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1510 int flags, CPUWatchpoint **watchpoint)
1511 {
1512 return -ENOSYS;
1513 }
1514 #else
1515 /* Add a watchpoint. */
1516 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1517 int flags, CPUWatchpoint **watchpoint)
1518 {
1519 target_ulong len_mask = ~(len - 1);
1520 CPUWatchpoint *wp;
1521
1522 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1523 if ((len & (len - 1)) || (addr & ~len_mask) ||
1524 len == 0 || len > TARGET_PAGE_SIZE) {
1525 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1526 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1527 return -EINVAL;
1528 }
1529 wp = g_malloc(sizeof(*wp));
1530
1531 wp->vaddr = addr;
1532 wp->len_mask = len_mask;
1533 wp->flags = flags;
1534
1535 /* keep all GDB-injected watchpoints in front */
1536 if (flags & BP_GDB)
1537 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1538 else
1539 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1540
1541 tlb_flush_page(env, addr);
1542
1543 if (watchpoint)
1544 *watchpoint = wp;
1545 return 0;
1546 }
1547
1548 /* Remove a specific watchpoint. */
1549 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1550 int flags)
1551 {
1552 target_ulong len_mask = ~(len - 1);
1553 CPUWatchpoint *wp;
1554
1555 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1556 if (addr == wp->vaddr && len_mask == wp->len_mask
1557 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1558 cpu_watchpoint_remove_by_ref(env, wp);
1559 return 0;
1560 }
1561 }
1562 return -ENOENT;
1563 }
1564
1565 /* Remove a specific watchpoint by reference. */
1566 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1567 {
1568 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1569
1570 tlb_flush_page(env, watchpoint->vaddr);
1571
1572 g_free(watchpoint);
1573 }
1574
1575 /* Remove all matching watchpoints. */
1576 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1577 {
1578 CPUWatchpoint *wp, *next;
1579
1580 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1581 if (wp->flags & mask)
1582 cpu_watchpoint_remove_by_ref(env, wp);
1583 }
1584 }
1585 #endif
1586
1587 /* Add a breakpoint. */
1588 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1589 CPUBreakpoint **breakpoint)
1590 {
1591 #if defined(TARGET_HAS_ICE)
1592 CPUBreakpoint *bp;
1593
1594 bp = g_malloc(sizeof(*bp));
1595
1596 bp->pc = pc;
1597 bp->flags = flags;
1598
1599 /* keep all GDB-injected breakpoints in front */
1600 if (flags & BP_GDB)
1601 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1602 else
1603 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1604
1605 breakpoint_invalidate(env, pc);
1606
1607 if (breakpoint)
1608 *breakpoint = bp;
1609 return 0;
1610 #else
1611 return -ENOSYS;
1612 #endif
1613 }
1614
1615 /* Remove a specific breakpoint. */
1616 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1617 {
1618 #if defined(TARGET_HAS_ICE)
1619 CPUBreakpoint *bp;
1620
1621 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1622 if (bp->pc == pc && bp->flags == flags) {
1623 cpu_breakpoint_remove_by_ref(env, bp);
1624 return 0;
1625 }
1626 }
1627 return -ENOENT;
1628 #else
1629 return -ENOSYS;
1630 #endif
1631 }
1632
1633 /* Remove a specific breakpoint by reference. */
1634 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1635 {
1636 #if defined(TARGET_HAS_ICE)
1637 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1638
1639 breakpoint_invalidate(env, breakpoint->pc);
1640
1641 g_free(breakpoint);
1642 #endif
1643 }
1644
1645 /* Remove all matching breakpoints. */
1646 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1647 {
1648 #if defined(TARGET_HAS_ICE)
1649 CPUBreakpoint *bp, *next;
1650
1651 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1652 if (bp->flags & mask)
1653 cpu_breakpoint_remove_by_ref(env, bp);
1654 }
1655 #endif
1656 }
1657
1658 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1659 CPU loop after each instruction */
1660 void cpu_single_step(CPUArchState *env, int enabled)
1661 {
1662 #if defined(TARGET_HAS_ICE)
1663 if (env->singlestep_enabled != enabled) {
1664 env->singlestep_enabled = enabled;
1665 if (kvm_enabled())
1666 kvm_update_guest_debug(env, 0);
1667 else {
1668 /* must flush all the translated code to avoid inconsistencies */
1669 /* XXX: only flush what is necessary */
1670 tb_flush(env);
1671 }
1672 }
1673 #endif
1674 }
1675
1676 static void cpu_unlink_tb(CPUArchState *env)
1677 {
1678 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1679 problem and hope the cpu will stop of its own accord. For userspace
1680 emulation this often isn't actually as bad as it sounds. Often
1681 signals are used primarily to interrupt blocking syscalls. */
1682 TranslationBlock *tb;
1683 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1684
1685 spin_lock(&interrupt_lock);
1686 tb = env->current_tb;
1687 /* if the cpu is currently executing code, we must unlink it and
1688 all the potentially executing TB */
1689 if (tb) {
1690 env->current_tb = NULL;
1691 tb_reset_jump_recursive(tb);
1692 }
1693 spin_unlock(&interrupt_lock);
1694 }
1695
1696 #ifndef CONFIG_USER_ONLY
1697 /* mask must never be zero, except for A20 change call */
1698 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1699 {
1700 int old_mask;
1701
1702 old_mask = env->interrupt_request;
1703 env->interrupt_request |= mask;
1704
1705 /*
1706 * If called from iothread context, wake the target cpu in
1707 * case its halted.
1708 */
1709 if (!qemu_cpu_is_self(env)) {
1710 qemu_cpu_kick(env);
1711 return;
1712 }
1713
1714 if (use_icount) {
1715 env->icount_decr.u16.high = 0xffff;
1716 if (!can_do_io(env)
1717 && (mask & ~old_mask) != 0) {
1718 cpu_abort(env, "Raised interrupt while not in I/O function");
1719 }
1720 } else {
1721 cpu_unlink_tb(env);
1722 }
1723 }
1724
1725 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1726
1727 #else /* CONFIG_USER_ONLY */
1728
1729 void cpu_interrupt(CPUArchState *env, int mask)
1730 {
1731 env->interrupt_request |= mask;
1732 cpu_unlink_tb(env);
1733 }
1734 #endif /* CONFIG_USER_ONLY */
1735
1736 void cpu_reset_interrupt(CPUArchState *env, int mask)
1737 {
1738 env->interrupt_request &= ~mask;
1739 }
1740
1741 void cpu_exit(CPUArchState *env)
1742 {
1743 env->exit_request = 1;
1744 cpu_unlink_tb(env);
1745 }
1746
1747 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1748 {
1749 va_list ap;
1750 va_list ap2;
1751
1752 va_start(ap, fmt);
1753 va_copy(ap2, ap);
1754 fprintf(stderr, "qemu: fatal: ");
1755 vfprintf(stderr, fmt, ap);
1756 fprintf(stderr, "\n");
1757 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1758 if (qemu_log_enabled()) {
1759 qemu_log("qemu: fatal: ");
1760 qemu_log_vprintf(fmt, ap2);
1761 qemu_log("\n");
1762 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1763 qemu_log_flush();
1764 qemu_log_close();
1765 }
1766 va_end(ap2);
1767 va_end(ap);
1768 #if defined(CONFIG_USER_ONLY)
1769 {
1770 struct sigaction act;
1771 sigfillset(&act.sa_mask);
1772 act.sa_handler = SIG_DFL;
1773 sigaction(SIGABRT, &act, NULL);
1774 }
1775 #endif
1776 abort();
1777 }
1778
1779 CPUArchState *cpu_copy(CPUArchState *env)
1780 {
1781 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1782 CPUArchState *next_cpu = new_env->next_cpu;
1783 int cpu_index = new_env->cpu_index;
1784 #if defined(TARGET_HAS_ICE)
1785 CPUBreakpoint *bp;
1786 CPUWatchpoint *wp;
1787 #endif
1788
1789 memcpy(new_env, env, sizeof(CPUArchState));
1790
1791 /* Preserve chaining and index. */
1792 new_env->next_cpu = next_cpu;
1793 new_env->cpu_index = cpu_index;
1794
1795 /* Clone all break/watchpoints.
1796 Note: Once we support ptrace with hw-debug register access, make sure
1797 BP_CPU break/watchpoints are handled correctly on clone. */
1798 QTAILQ_INIT(&env->breakpoints);
1799 QTAILQ_INIT(&env->watchpoints);
1800 #if defined(TARGET_HAS_ICE)
1801 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1802 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1803 }
1804 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1805 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1806 wp->flags, NULL);
1807 }
1808 #endif
1809
1810 return new_env;
1811 }
1812
1813 #if !defined(CONFIG_USER_ONLY)
1814 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1815 {
1816 unsigned int i;
1817
1818 /* Discard jump cache entries for any tb which might potentially
1819 overlap the flushed page. */
1820 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1821 memset (&env->tb_jmp_cache[i], 0,
1822 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1823
1824 i = tb_jmp_cache_hash_page(addr);
1825 memset (&env->tb_jmp_cache[i], 0,
1826 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1827 }
1828
1829 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1830 uintptr_t length)
1831 {
1832 uintptr_t start1;
1833
1834 /* we modify the TLB cache so that the dirty bit will be set again
1835 when accessing the range */
1836 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1837 /* Check that we don't span multiple blocks - this breaks the
1838 address comparisons below. */
1839 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1840 != (end - 1) - start) {
1841 abort();
1842 }
1843 cpu_tlb_reset_dirty_all(start1, length);
1844
1845 }
1846
1847 /* Note: start and end must be within the same ram block. */
1848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1849 int dirty_flags)
1850 {
1851 uintptr_t length;
1852
1853 start &= TARGET_PAGE_MASK;
1854 end = TARGET_PAGE_ALIGN(end);
1855
1856 length = end - start;
1857 if (length == 0)
1858 return;
1859 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1860
1861 if (tcg_enabled()) {
1862 tlb_reset_dirty_range_all(start, end, length);
1863 }
1864 }
1865
1866 int cpu_physical_memory_set_dirty_tracking(int enable)
1867 {
1868 int ret = 0;
1869 in_migration = enable;
1870 return ret;
1871 }
1872
1873 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1874 MemoryRegionSection *section,
1875 target_ulong vaddr,
1876 target_phys_addr_t paddr,
1877 int prot,
1878 target_ulong *address)
1879 {
1880 target_phys_addr_t iotlb;
1881 CPUWatchpoint *wp;
1882
1883 if (memory_region_is_ram(section->mr)) {
1884 /* Normal RAM. */
1885 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1886 + memory_region_section_addr(section, paddr);
1887 if (!section->readonly) {
1888 iotlb |= phys_section_notdirty;
1889 } else {
1890 iotlb |= phys_section_rom;
1891 }
1892 } else {
1893 /* IO handlers are currently passed a physical address.
1894 It would be nice to pass an offset from the base address
1895 of that region. This would avoid having to special case RAM,
1896 and avoid full address decoding in every device.
1897 We can't use the high bits of pd for this because
1898 IO_MEM_ROMD uses these as a ram address. */
1899 iotlb = section - phys_sections;
1900 iotlb += memory_region_section_addr(section, paddr);
1901 }
1902
1903 /* Make accesses to pages with watchpoints go via the
1904 watchpoint trap routines. */
1905 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1906 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1907 /* Avoid trapping reads of pages with a write breakpoint. */
1908 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1909 iotlb = phys_section_watch + paddr;
1910 *address |= TLB_MMIO;
1911 break;
1912 }
1913 }
1914 }
1915
1916 return iotlb;
1917 }
1918
1919 #else
1920 /*
1921 * Walks guest process memory "regions" one by one
1922 * and calls callback function 'fn' for each region.
1923 */
1924
1925 struct walk_memory_regions_data
1926 {
1927 walk_memory_regions_fn fn;
1928 void *priv;
1929 uintptr_t start;
1930 int prot;
1931 };
1932
1933 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1934 abi_ulong end, int new_prot)
1935 {
1936 if (data->start != -1ul) {
1937 int rc = data->fn(data->priv, data->start, end, data->prot);
1938 if (rc != 0) {
1939 return rc;
1940 }
1941 }
1942
1943 data->start = (new_prot ? end : -1ul);
1944 data->prot = new_prot;
1945
1946 return 0;
1947 }
1948
1949 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1950 abi_ulong base, int level, void **lp)
1951 {
1952 abi_ulong pa;
1953 int i, rc;
1954
1955 if (*lp == NULL) {
1956 return walk_memory_regions_end(data, base, 0);
1957 }
1958
1959 if (level == 0) {
1960 PageDesc *pd = *lp;
1961 for (i = 0; i < L2_SIZE; ++i) {
1962 int prot = pd[i].flags;
1963
1964 pa = base | (i << TARGET_PAGE_BITS);
1965 if (prot != data->prot) {
1966 rc = walk_memory_regions_end(data, pa, prot);
1967 if (rc != 0) {
1968 return rc;
1969 }
1970 }
1971 }
1972 } else {
1973 void **pp = *lp;
1974 for (i = 0; i < L2_SIZE; ++i) {
1975 pa = base | ((abi_ulong)i <<
1976 (TARGET_PAGE_BITS + L2_BITS * level));
1977 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1978 if (rc != 0) {
1979 return rc;
1980 }
1981 }
1982 }
1983
1984 return 0;
1985 }
1986
1987 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1988 {
1989 struct walk_memory_regions_data data;
1990 uintptr_t i;
1991
1992 data.fn = fn;
1993 data.priv = priv;
1994 data.start = -1ul;
1995 data.prot = 0;
1996
1997 for (i = 0; i < V_L1_SIZE; i++) {
1998 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1999 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2000 if (rc != 0) {
2001 return rc;
2002 }
2003 }
2004
2005 return walk_memory_regions_end(&data, 0, 0);
2006 }
2007
2008 static int dump_region(void *priv, abi_ulong start,
2009 abi_ulong end, unsigned long prot)
2010 {
2011 FILE *f = (FILE *)priv;
2012
2013 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2014 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2015 start, end, end - start,
2016 ((prot & PAGE_READ) ? 'r' : '-'),
2017 ((prot & PAGE_WRITE) ? 'w' : '-'),
2018 ((prot & PAGE_EXEC) ? 'x' : '-'));
2019
2020 return (0);
2021 }
2022
2023 /* dump memory mappings */
2024 void page_dump(FILE *f)
2025 {
2026 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2027 "start", "end", "size", "prot");
2028 walk_memory_regions(f, dump_region);
2029 }
2030
2031 int page_get_flags(target_ulong address)
2032 {
2033 PageDesc *p;
2034
2035 p = page_find(address >> TARGET_PAGE_BITS);
2036 if (!p)
2037 return 0;
2038 return p->flags;
2039 }
2040
2041 /* Modify the flags of a page and invalidate the code if necessary.
2042 The flag PAGE_WRITE_ORG is positioned automatically depending
2043 on PAGE_WRITE. The mmap_lock should already be held. */
2044 void page_set_flags(target_ulong start, target_ulong end, int flags)
2045 {
2046 target_ulong addr, len;
2047
2048 /* This function should never be called with addresses outside the
2049 guest address space. If this assert fires, it probably indicates
2050 a missing call to h2g_valid. */
2051 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2052 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2053 #endif
2054 assert(start < end);
2055
2056 start = start & TARGET_PAGE_MASK;
2057 end = TARGET_PAGE_ALIGN(end);
2058
2059 if (flags & PAGE_WRITE) {
2060 flags |= PAGE_WRITE_ORG;
2061 }
2062
2063 for (addr = start, len = end - start;
2064 len != 0;
2065 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2066 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2067
2068 /* If the write protection bit is set, then we invalidate
2069 the code inside. */
2070 if (!(p->flags & PAGE_WRITE) &&
2071 (flags & PAGE_WRITE) &&
2072 p->first_tb) {
2073 tb_invalidate_phys_page(addr, 0, NULL);
2074 }
2075 p->flags = flags;
2076 }
2077 }
2078
2079 int page_check_range(target_ulong start, target_ulong len, int flags)
2080 {
2081 PageDesc *p;
2082 target_ulong end;
2083 target_ulong addr;
2084
2085 /* This function should never be called with addresses outside the
2086 guest address space. If this assert fires, it probably indicates
2087 a missing call to h2g_valid. */
2088 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2089 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2090 #endif
2091
2092 if (len == 0) {
2093 return 0;
2094 }
2095 if (start + len - 1 < start) {
2096 /* We've wrapped around. */
2097 return -1;
2098 }
2099
2100 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2101 start = start & TARGET_PAGE_MASK;
2102
2103 for (addr = start, len = end - start;
2104 len != 0;
2105 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2106 p = page_find(addr >> TARGET_PAGE_BITS);
2107 if( !p )
2108 return -1;
2109 if( !(p->flags & PAGE_VALID) )
2110 return -1;
2111
2112 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2113 return -1;
2114 if (flags & PAGE_WRITE) {
2115 if (!(p->flags & PAGE_WRITE_ORG))
2116 return -1;
2117 /* unprotect the page if it was put read-only because it
2118 contains translated code */
2119 if (!(p->flags & PAGE_WRITE)) {
2120 if (!page_unprotect(addr, 0, NULL))
2121 return -1;
2122 }
2123 return 0;
2124 }
2125 }
2126 return 0;
2127 }
2128
2129 /* called from signal handler: invalidate the code and unprotect the
2130 page. Return TRUE if the fault was successfully handled. */
2131 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2132 {
2133 unsigned int prot;
2134 PageDesc *p;
2135 target_ulong host_start, host_end, addr;
2136
2137 /* Technically this isn't safe inside a signal handler. However we
2138 know this only ever happens in a synchronous SEGV handler, so in
2139 practice it seems to be ok. */
2140 mmap_lock();
2141
2142 p = page_find(address >> TARGET_PAGE_BITS);
2143 if (!p) {
2144 mmap_unlock();
2145 return 0;
2146 }
2147
2148 /* if the page was really writable, then we change its
2149 protection back to writable */
2150 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2151 host_start = address & qemu_host_page_mask;
2152 host_end = host_start + qemu_host_page_size;
2153
2154 prot = 0;
2155 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2156 p = page_find(addr >> TARGET_PAGE_BITS);
2157 p->flags |= PAGE_WRITE;
2158 prot |= p->flags;
2159
2160 /* and since the content will be modified, we must invalidate
2161 the corresponding translated code. */
2162 tb_invalidate_phys_page(addr, pc, puc);
2163 #ifdef DEBUG_TB_CHECK
2164 tb_invalidate_check(addr);
2165 #endif
2166 }
2167 mprotect((void *)g2h(host_start), qemu_host_page_size,
2168 prot & PAGE_BITS);
2169
2170 mmap_unlock();
2171 return 1;
2172 }
2173 mmap_unlock();
2174 return 0;
2175 }
2176 #endif /* defined(CONFIG_USER_ONLY) */
2177
2178 #if !defined(CONFIG_USER_ONLY)
2179
2180 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2181 typedef struct subpage_t {
2182 MemoryRegion iomem;
2183 target_phys_addr_t base;
2184 uint16_t sub_section[TARGET_PAGE_SIZE];
2185 } subpage_t;
2186
2187 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2188 uint16_t section);
2189 static subpage_t *subpage_init(target_phys_addr_t base);
2190 static void destroy_page_desc(uint16_t section_index)
2191 {
2192 MemoryRegionSection *section = &phys_sections[section_index];
2193 MemoryRegion *mr = section->mr;
2194
2195 if (mr->subpage) {
2196 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2197 memory_region_destroy(&subpage->iomem);
2198 g_free(subpage);
2199 }
2200 }
2201
2202 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2203 {
2204 unsigned i;
2205 PhysPageEntry *p;
2206
2207 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2208 return;
2209 }
2210
2211 p = phys_map_nodes[lp->ptr];
2212 for (i = 0; i < L2_SIZE; ++i) {
2213 if (!p[i].is_leaf) {
2214 destroy_l2_mapping(&p[i], level - 1);
2215 } else {
2216 destroy_page_desc(p[i].ptr);
2217 }
2218 }
2219 lp->is_leaf = 0;
2220 lp->ptr = PHYS_MAP_NODE_NIL;
2221 }
2222
2223 static void destroy_all_mappings(void)
2224 {
2225 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2226 phys_map_nodes_reset();
2227 }
2228
2229 static uint16_t phys_section_add(MemoryRegionSection *section)
2230 {
2231 if (phys_sections_nb == phys_sections_nb_alloc) {
2232 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2233 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2234 phys_sections_nb_alloc);
2235 }
2236 phys_sections[phys_sections_nb] = *section;
2237 return phys_sections_nb++;
2238 }
2239
2240 static void phys_sections_clear(void)
2241 {
2242 phys_sections_nb = 0;
2243 }
2244
2245 static void register_subpage(MemoryRegionSection *section)
2246 {
2247 subpage_t *subpage;
2248 target_phys_addr_t base = section->offset_within_address_space
2249 & TARGET_PAGE_MASK;
2250 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2251 MemoryRegionSection subsection = {
2252 .offset_within_address_space = base,
2253 .size = TARGET_PAGE_SIZE,
2254 };
2255 target_phys_addr_t start, end;
2256
2257 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2258
2259 if (!(existing->mr->subpage)) {
2260 subpage = subpage_init(base);
2261 subsection.mr = &subpage->iomem;
2262 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2263 phys_section_add(&subsection));
2264 } else {
2265 subpage = container_of(existing->mr, subpage_t, iomem);
2266 }
2267 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2268 end = start + section->size - 1;
2269 subpage_register(subpage, start, end, phys_section_add(section));
2270 }
2271
2272
2273 static void register_multipage(MemoryRegionSection *section)
2274 {
2275 target_phys_addr_t start_addr = section->offset_within_address_space;
2276 ram_addr_t size = section->size;
2277 target_phys_addr_t addr;
2278 uint16_t section_index = phys_section_add(section);
2279
2280 assert(size);
2281
2282 addr = start_addr;
2283 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2284 section_index);
2285 }
2286
2287 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2288 bool readonly)
2289 {
2290 MemoryRegionSection now = *section, remain = *section;
2291
2292 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2293 || (now.size < TARGET_PAGE_SIZE)) {
2294 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2295 - now.offset_within_address_space,
2296 now.size);
2297 register_subpage(&now);
2298 remain.size -= now.size;
2299 remain.offset_within_address_space += now.size;
2300 remain.offset_within_region += now.size;
2301 }
2302 while (remain.size >= TARGET_PAGE_SIZE) {
2303 now = remain;
2304 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2305 now.size = TARGET_PAGE_SIZE;
2306 register_subpage(&now);
2307 } else {
2308 now.size &= TARGET_PAGE_MASK;
2309 register_multipage(&now);
2310 }
2311 remain.size -= now.size;
2312 remain.offset_within_address_space += now.size;
2313 remain.offset_within_region += now.size;
2314 }
2315 now = remain;
2316 if (now.size) {
2317 register_subpage(&now);
2318 }
2319 }
2320
2321
2322 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2323 {
2324 if (kvm_enabled())
2325 kvm_coalesce_mmio_region(addr, size);
2326 }
2327
2328 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2329 {
2330 if (kvm_enabled())
2331 kvm_uncoalesce_mmio_region(addr, size);
2332 }
2333
2334 void qemu_flush_coalesced_mmio_buffer(void)
2335 {
2336 if (kvm_enabled())
2337 kvm_flush_coalesced_mmio_buffer();
2338 }
2339
2340 #if defined(__linux__) && !defined(TARGET_S390X)
2341
2342 #include <sys/vfs.h>
2343
2344 #define HUGETLBFS_MAGIC 0x958458f6
2345
2346 static long gethugepagesize(const char *path)
2347 {
2348 struct statfs fs;
2349 int ret;
2350
2351 do {
2352 ret = statfs(path, &fs);
2353 } while (ret != 0 && errno == EINTR);
2354
2355 if (ret != 0) {
2356 perror(path);
2357 return 0;
2358 }
2359
2360 if (fs.f_type != HUGETLBFS_MAGIC)
2361 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2362
2363 return fs.f_bsize;
2364 }
2365
2366 static void *file_ram_alloc(RAMBlock *block,
2367 ram_addr_t memory,
2368 const char *path)
2369 {
2370 char *filename;
2371 void *area;
2372 int fd;
2373 #ifdef MAP_POPULATE
2374 int flags;
2375 #endif
2376 unsigned long hpagesize;
2377
2378 hpagesize = gethugepagesize(path);
2379 if (!hpagesize) {
2380 return NULL;
2381 }
2382
2383 if (memory < hpagesize) {
2384 return NULL;
2385 }
2386
2387 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2388 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2389 return NULL;
2390 }
2391
2392 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2393 return NULL;
2394 }
2395
2396 fd = mkstemp(filename);
2397 if (fd < 0) {
2398 perror("unable to create backing store for hugepages");
2399 free(filename);
2400 return NULL;
2401 }
2402 unlink(filename);
2403 free(filename);
2404
2405 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2406
2407 /*
2408 * ftruncate is not supported by hugetlbfs in older
2409 * hosts, so don't bother bailing out on errors.
2410 * If anything goes wrong with it under other filesystems,
2411 * mmap will fail.
2412 */
2413 if (ftruncate(fd, memory))
2414 perror("ftruncate");
2415
2416 #ifdef MAP_POPULATE
2417 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2418 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2419 * to sidestep this quirk.
2420 */
2421 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2422 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2423 #else
2424 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2425 #endif
2426 if (area == MAP_FAILED) {
2427 perror("file_ram_alloc: can't mmap RAM pages");
2428 close(fd);
2429 return (NULL);
2430 }
2431 block->fd = fd;
2432 return area;
2433 }
2434 #endif
2435
2436 static ram_addr_t find_ram_offset(ram_addr_t size)
2437 {
2438 RAMBlock *block, *next_block;
2439 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2440
2441 if (QLIST_EMPTY(&ram_list.blocks))
2442 return 0;
2443
2444 QLIST_FOREACH(block, &ram_list.blocks, next) {
2445 ram_addr_t end, next = RAM_ADDR_MAX;
2446
2447 end = block->offset + block->length;
2448
2449 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2450 if (next_block->offset >= end) {
2451 next = MIN(next, next_block->offset);
2452 }
2453 }
2454 if (next - end >= size && next - end < mingap) {
2455 offset = end;
2456 mingap = next - end;
2457 }
2458 }
2459
2460 if (offset == RAM_ADDR_MAX) {
2461 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2462 (uint64_t)size);
2463 abort();
2464 }
2465
2466 return offset;
2467 }
2468
2469 static ram_addr_t last_ram_offset(void)
2470 {
2471 RAMBlock *block;
2472 ram_addr_t last = 0;
2473
2474 QLIST_FOREACH(block, &ram_list.blocks, next)
2475 last = MAX(last, block->offset + block->length);
2476
2477 return last;
2478 }
2479
2480 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2481 {
2482 int ret;
2483 QemuOpts *machine_opts;
2484
2485 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2486 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2487 if (machine_opts &&
2488 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2489 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2490 if (ret) {
2491 perror("qemu_madvise");
2492 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2493 "but dump_guest_core=off specified\n");
2494 }
2495 }
2496 }
2497
2498 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2499 {
2500 RAMBlock *new_block, *block;
2501
2502 new_block = NULL;
2503 QLIST_FOREACH(block, &ram_list.blocks, next) {
2504 if (block->offset == addr) {
2505 new_block = block;
2506 break;
2507 }
2508 }
2509 assert(new_block);
2510 assert(!new_block->idstr[0]);
2511
2512 if (dev) {
2513 char *id = qdev_get_dev_path(dev);
2514 if (id) {
2515 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2516 g_free(id);
2517 }
2518 }
2519 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2520
2521 QLIST_FOREACH(block, &ram_list.blocks, next) {
2522 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2523 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2524 new_block->idstr);
2525 abort();
2526 }
2527 }
2528 }
2529
2530 static int memory_try_enable_merging(void *addr, size_t len)
2531 {
2532 QemuOpts *opts;
2533
2534 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2535 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2536 /* disabled by the user */
2537 return 0;
2538 }
2539
2540 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2541 }
2542
2543 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2544 MemoryRegion *mr)
2545 {
2546 RAMBlock *new_block;
2547
2548 size = TARGET_PAGE_ALIGN(size);
2549 new_block = g_malloc0(sizeof(*new_block));
2550
2551 new_block->mr = mr;
2552 new_block->offset = find_ram_offset(size);
2553 if (host) {
2554 new_block->host = host;
2555 new_block->flags |= RAM_PREALLOC_MASK;
2556 } else {
2557 if (mem_path) {
2558 #if defined (__linux__) && !defined(TARGET_S390X)
2559 new_block->host = file_ram_alloc(new_block, size, mem_path);
2560 if (!new_block->host) {
2561 new_block->host = qemu_vmalloc(size);
2562 memory_try_enable_merging(new_block->host, size);
2563 }
2564 #else
2565 fprintf(stderr, "-mem-path option unsupported\n");
2566 exit(1);
2567 #endif
2568 } else {
2569 if (xen_enabled()) {
2570 xen_ram_alloc(new_block->offset, size, mr);
2571 } else if (kvm_enabled()) {
2572 /* some s390/kvm configurations have special constraints */
2573 new_block->host = kvm_vmalloc(size);
2574 } else {
2575 new_block->host = qemu_vmalloc(size);
2576 }
2577 memory_try_enable_merging(new_block->host, size);
2578 }
2579 }
2580 new_block->length = size;
2581
2582 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2583
2584 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2585 last_ram_offset() >> TARGET_PAGE_BITS);
2586 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2587 0, size >> TARGET_PAGE_BITS);
2588 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2589
2590 qemu_ram_setup_dump(new_block->host, size);
2591
2592 if (kvm_enabled())
2593 kvm_setup_guest_memory(new_block->host, size);
2594
2595 return new_block->offset;
2596 }
2597
2598 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2599 {
2600 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2601 }
2602
2603 void qemu_ram_free_from_ptr(ram_addr_t addr)
2604 {
2605 RAMBlock *block;
2606
2607 QLIST_FOREACH(block, &ram_list.blocks, next) {
2608 if (addr == block->offset) {
2609 QLIST_REMOVE(block, next);
2610 g_free(block);
2611 return;
2612 }
2613 }
2614 }
2615
2616 void qemu_ram_free(ram_addr_t addr)
2617 {
2618 RAMBlock *block;
2619
2620 QLIST_FOREACH(block, &ram_list.blocks, next) {
2621 if (addr == block->offset) {
2622 QLIST_REMOVE(block, next);
2623 if (block->flags & RAM_PREALLOC_MASK) {
2624 ;
2625 } else if (mem_path) {
2626 #if defined (__linux__) && !defined(TARGET_S390X)
2627 if (block->fd) {
2628 munmap(block->host, block->length);
2629 close(block->fd);
2630 } else {
2631 qemu_vfree(block->host);
2632 }
2633 #else
2634 abort();
2635 #endif
2636 } else {
2637 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2638 munmap(block->host, block->length);
2639 #else
2640 if (xen_enabled()) {
2641 xen_invalidate_map_cache_entry(block->host);
2642 } else {
2643 qemu_vfree(block->host);
2644 }
2645 #endif
2646 }
2647 g_free(block);
2648 return;
2649 }
2650 }
2651
2652 }
2653
2654 #ifndef _WIN32
2655 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2656 {
2657 RAMBlock *block;
2658 ram_addr_t offset;
2659 int flags;
2660 void *area, *vaddr;
2661
2662 QLIST_FOREACH(block, &ram_list.blocks, next) {
2663 offset = addr - block->offset;
2664 if (offset < block->length) {
2665 vaddr = block->host + offset;
2666 if (block->flags & RAM_PREALLOC_MASK) {
2667 ;
2668 } else {
2669 flags = MAP_FIXED;
2670 munmap(vaddr, length);
2671 if (mem_path) {
2672 #if defined(__linux__) && !defined(TARGET_S390X)
2673 if (block->fd) {
2674 #ifdef MAP_POPULATE
2675 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2676 MAP_PRIVATE;
2677 #else
2678 flags |= MAP_PRIVATE;
2679 #endif
2680 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2681 flags, block->fd, offset);
2682 } else {
2683 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685 flags, -1, 0);
2686 }
2687 #else
2688 abort();
2689 #endif
2690 } else {
2691 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2692 flags |= MAP_SHARED | MAP_ANONYMOUS;
2693 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2694 flags, -1, 0);
2695 #else
2696 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2697 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2698 flags, -1, 0);
2699 #endif
2700 }
2701 if (area != vaddr) {
2702 fprintf(stderr, "Could not remap addr: "
2703 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2704 length, addr);
2705 exit(1);
2706 }
2707 memory_try_enable_merging(vaddr, length);
2708 qemu_ram_setup_dump(vaddr, length);
2709 }
2710 return;
2711 }
2712 }
2713 }
2714 #endif /* !_WIN32 */
2715
2716 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2717 With the exception of the softmmu code in this file, this should
2718 only be used for local memory (e.g. video ram) that the device owns,
2719 and knows it isn't going to access beyond the end of the block.
2720
2721 It should not be used for general purpose DMA.
2722 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2723 */
2724 void *qemu_get_ram_ptr(ram_addr_t addr)
2725 {
2726 RAMBlock *block;
2727
2728 QLIST_FOREACH(block, &ram_list.blocks, next) {
2729 if (addr - block->offset < block->length) {
2730 /* Move this entry to to start of the list. */
2731 if (block != QLIST_FIRST(&ram_list.blocks)) {
2732 QLIST_REMOVE(block, next);
2733 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2734 }
2735 if (xen_enabled()) {
2736 /* We need to check if the requested address is in the RAM
2737 * because we don't want to map the entire memory in QEMU.
2738 * In that case just map until the end of the page.
2739 */
2740 if (block->offset == 0) {
2741 return xen_map_cache(addr, 0, 0);
2742 } else if (block->host == NULL) {
2743 block->host =
2744 xen_map_cache(block->offset, block->length, 1);
2745 }
2746 }
2747 return block->host + (addr - block->offset);
2748 }
2749 }
2750
2751 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2752 abort();
2753
2754 return NULL;
2755 }
2756
2757 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2758 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2759 */
2760 void *qemu_safe_ram_ptr(ram_addr_t addr)
2761 {
2762 RAMBlock *block;
2763
2764 QLIST_FOREACH(block, &ram_list.blocks, next) {
2765 if (addr - block->offset < block->length) {
2766 if (xen_enabled()) {
2767 /* We need to check if the requested address is in the RAM
2768 * because we don't want to map the entire memory in QEMU.
2769 * In that case just map until the end of the page.
2770 */
2771 if (block->offset == 0) {
2772 return xen_map_cache(addr, 0, 0);
2773 } else if (block->host == NULL) {
2774 block->host =
2775 xen_map_cache(block->offset, block->length, 1);
2776 }
2777 }
2778 return block->host + (addr - block->offset);
2779 }
2780 }
2781
2782 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2783 abort();
2784
2785 return NULL;
2786 }
2787
2788 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2789 * but takes a size argument */
2790 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2791 {
2792 if (*size == 0) {
2793 return NULL;
2794 }
2795 if (xen_enabled()) {
2796 return xen_map_cache(addr, *size, 1);
2797 } else {
2798 RAMBlock *block;
2799
2800 QLIST_FOREACH(block, &ram_list.blocks, next) {
2801 if (addr - block->offset < block->length) {
2802 if (addr - block->offset + *size > block->length)
2803 *size = block->length - addr + block->offset;
2804 return block->host + (addr - block->offset);
2805 }
2806 }
2807
2808 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2809 abort();
2810 }
2811 }
2812
2813 void qemu_put_ram_ptr(void *addr)
2814 {
2815 trace_qemu_put_ram_ptr(addr);
2816 }
2817
2818 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2819 {
2820 RAMBlock *block;
2821 uint8_t *host = ptr;
2822
2823 if (xen_enabled()) {
2824 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2825 return 0;
2826 }
2827
2828 QLIST_FOREACH(block, &ram_list.blocks, next) {
2829 /* This case append when the block is not mapped. */
2830 if (block->host == NULL) {
2831 continue;
2832 }
2833 if (host - block->host < block->length) {
2834 *ram_addr = block->offset + (host - block->host);
2835 return 0;
2836 }
2837 }
2838
2839 return -1;
2840 }
2841
2842 /* Some of the softmmu routines need to translate from a host pointer
2843 (typically a TLB entry) back to a ram offset. */
2844 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2845 {
2846 ram_addr_t ram_addr;
2847
2848 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2849 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2850 abort();
2851 }
2852 return ram_addr;
2853 }
2854
2855 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2856 unsigned size)
2857 {
2858 #ifdef DEBUG_UNASSIGNED
2859 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2860 #endif
2861 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2862 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2863 #endif
2864 return 0;
2865 }
2866
2867 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2868 uint64_t val, unsigned size)
2869 {
2870 #ifdef DEBUG_UNASSIGNED
2871 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2872 #endif
2873 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2874 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2875 #endif
2876 }
2877
2878 static const MemoryRegionOps unassigned_mem_ops = {
2879 .read = unassigned_mem_read,
2880 .write = unassigned_mem_write,
2881 .endianness = DEVICE_NATIVE_ENDIAN,
2882 };
2883
2884 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2885 unsigned size)
2886 {
2887 abort();
2888 }
2889
2890 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2891 uint64_t value, unsigned size)
2892 {
2893 abort();
2894 }
2895
2896 static const MemoryRegionOps error_mem_ops = {
2897 .read = error_mem_read,
2898 .write = error_mem_write,
2899 .endianness = DEVICE_NATIVE_ENDIAN,
2900 };
2901
2902 static const MemoryRegionOps rom_mem_ops = {
2903 .read = error_mem_read,
2904 .write = unassigned_mem_write,
2905 .endianness = DEVICE_NATIVE_ENDIAN,
2906 };
2907
2908 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2909 uint64_t val, unsigned size)
2910 {
2911 int dirty_flags;
2912 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2913 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2914 #if !defined(CONFIG_USER_ONLY)
2915 tb_invalidate_phys_page_fast(ram_addr, size);
2916 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2917 #endif
2918 }
2919 switch (size) {
2920 case 1:
2921 stb_p(qemu_get_ram_ptr(ram_addr), val);
2922 break;
2923 case 2:
2924 stw_p(qemu_get_ram_ptr(ram_addr), val);
2925 break;
2926 case 4:
2927 stl_p(qemu_get_ram_ptr(ram_addr), val);
2928 break;
2929 default:
2930 abort();
2931 }
2932 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2933 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2934 /* we remove the notdirty callback only if the code has been
2935 flushed */
2936 if (dirty_flags == 0xff)
2937 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2938 }
2939
2940 static const MemoryRegionOps notdirty_mem_ops = {
2941 .read = error_mem_read,
2942 .write = notdirty_mem_write,
2943 .endianness = DEVICE_NATIVE_ENDIAN,
2944 };
2945
2946 /* Generate a debug exception if a watchpoint has been hit. */
2947 static void check_watchpoint(int offset, int len_mask, int flags)
2948 {
2949 CPUArchState *env = cpu_single_env;
2950 target_ulong pc, cs_base;
2951 TranslationBlock *tb;
2952 target_ulong vaddr;
2953 CPUWatchpoint *wp;
2954 int cpu_flags;
2955
2956 if (env->watchpoint_hit) {
2957 /* We re-entered the check after replacing the TB. Now raise
2958 * the debug interrupt so that is will trigger after the
2959 * current instruction. */
2960 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2961 return;
2962 }
2963 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2964 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2965 if ((vaddr == (wp->vaddr & len_mask) ||
2966 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2967 wp->flags |= BP_WATCHPOINT_HIT;
2968 if (!env->watchpoint_hit) {
2969 env->watchpoint_hit = wp;
2970 tb = tb_find_pc(env->mem_io_pc);
2971 if (!tb) {
2972 cpu_abort(env, "check_watchpoint: could not find TB for "
2973 "pc=%p", (void *)env->mem_io_pc);
2974 }
2975 cpu_restore_state(tb, env, env->mem_io_pc);
2976 tb_phys_invalidate(tb, -1);
2977 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2978 env->exception_index = EXCP_DEBUG;
2979 cpu_loop_exit(env);
2980 } else {
2981 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2982 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2983 cpu_resume_from_signal(env, NULL);
2984 }
2985 }
2986 } else {
2987 wp->flags &= ~BP_WATCHPOINT_HIT;
2988 }
2989 }
2990 }
2991
2992 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2993 so these check for a hit then pass through to the normal out-of-line
2994 phys routines. */
2995 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2996 unsigned size)
2997 {
2998 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2999 switch (size) {
3000 case 1: return ldub_phys(addr);
3001 case 2: return lduw_phys(addr);
3002 case 4: return ldl_phys(addr);
3003 default: abort();
3004 }
3005 }
3006
3007 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3008 uint64_t val, unsigned size)
3009 {
3010 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3011 switch (size) {
3012 case 1:
3013 stb_phys(addr, val);
3014 break;
3015 case 2:
3016 stw_phys(addr, val);
3017 break;
3018 case 4:
3019 stl_phys(addr, val);
3020 break;
3021 default: abort();
3022 }
3023 }
3024
3025 static const MemoryRegionOps watch_mem_ops = {
3026 .read = watch_mem_read,
3027 .write = watch_mem_write,
3028 .endianness = DEVICE_NATIVE_ENDIAN,
3029 };
3030
3031 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3032 unsigned len)
3033 {
3034 subpage_t *mmio = opaque;
3035 unsigned int idx = SUBPAGE_IDX(addr);
3036 MemoryRegionSection *section;
3037 #if defined(DEBUG_SUBPAGE)
3038 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3039 mmio, len, addr, idx);
3040 #endif
3041
3042 section = &phys_sections[mmio->sub_section[idx]];
3043 addr += mmio->base;
3044 addr -= section->offset_within_address_space;
3045 addr += section->offset_within_region;
3046 return io_mem_read(section->mr, addr, len);
3047 }
3048
3049 static void subpage_write(void *opaque, target_phys_addr_t addr,
3050 uint64_t value, unsigned len)
3051 {
3052 subpage_t *mmio = opaque;
3053 unsigned int idx = SUBPAGE_IDX(addr);
3054 MemoryRegionSection *section;
3055 #if defined(DEBUG_SUBPAGE)
3056 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3057 " idx %d value %"PRIx64"\n",
3058 __func__, mmio, len, addr, idx, value);
3059 #endif
3060
3061 section = &phys_sections[mmio->sub_section[idx]];
3062 addr += mmio->base;
3063 addr -= section->offset_within_address_space;
3064 addr += section->offset_within_region;
3065 io_mem_write(section->mr, addr, value, len);
3066 }
3067
3068 static const MemoryRegionOps subpage_ops = {
3069 .read = subpage_read,
3070 .write = subpage_write,
3071 .endianness = DEVICE_NATIVE_ENDIAN,
3072 };
3073
3074 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3075 unsigned size)
3076 {
3077 ram_addr_t raddr = addr;
3078 void *ptr = qemu_get_ram_ptr(raddr);
3079 switch (size) {
3080 case 1: return ldub_p(ptr);
3081 case 2: return lduw_p(ptr);
3082 case 4: return ldl_p(ptr);
3083 default: abort();
3084 }
3085 }
3086
3087 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3088 uint64_t value, unsigned size)
3089 {
3090 ram_addr_t raddr = addr;
3091 void *ptr = qemu_get_ram_ptr(raddr);
3092 switch (size) {
3093 case 1: return stb_p(ptr, value);
3094 case 2: return stw_p(ptr, value);
3095 case 4: return stl_p(ptr, value);
3096 default: abort();
3097 }
3098 }
3099
3100 static const MemoryRegionOps subpage_ram_ops = {
3101 .read = subpage_ram_read,
3102 .write = subpage_ram_write,
3103 .endianness = DEVICE_NATIVE_ENDIAN,
3104 };
3105
3106 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3107 uint16_t section)
3108 {
3109 int idx, eidx;
3110
3111 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3112 return -1;
3113 idx = SUBPAGE_IDX(start);
3114 eidx = SUBPAGE_IDX(end);
3115 #if defined(DEBUG_SUBPAGE)
3116 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3117 mmio, start, end, idx, eidx, memory);
3118 #endif
3119 if (memory_region_is_ram(phys_sections[section].mr)) {
3120 MemoryRegionSection new_section = phys_sections[section];
3121 new_section.mr = &io_mem_subpage_ram;
3122 section = phys_section_add(&new_section);
3123 }
3124 for (; idx <= eidx; idx++) {
3125 mmio->sub_section[idx] = section;
3126 }
3127
3128 return 0;
3129 }
3130
3131 static subpage_t *subpage_init(target_phys_addr_t base)
3132 {
3133 subpage_t *mmio;
3134
3135 mmio = g_malloc0(sizeof(subpage_t));
3136
3137 mmio->base = base;
3138 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3139 "subpage", TARGET_PAGE_SIZE);
3140 mmio->iomem.subpage = true;
3141 #if defined(DEBUG_SUBPAGE)
3142 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3143 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3144 #endif
3145 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3146
3147 return mmio;
3148 }
3149
3150 static uint16_t dummy_section(MemoryRegion *mr)
3151 {
3152 MemoryRegionSection section = {
3153 .mr = mr,
3154 .offset_within_address_space = 0,
3155 .offset_within_region = 0,
3156 .size = UINT64_MAX,
3157 };
3158
3159 return phys_section_add(&section);
3160 }
3161
3162 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3163 {
3164 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3165 }
3166
3167 static void io_mem_init(void)
3168 {
3169 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3170 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3171 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3172 "unassigned", UINT64_MAX);
3173 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3174 "notdirty", UINT64_MAX);
3175 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3176 "subpage-ram", UINT64_MAX);
3177 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3178 "watch", UINT64_MAX);
3179 }
3180
3181 static void core_begin(MemoryListener *listener)
3182 {
3183 destroy_all_mappings();
3184 phys_sections_clear();
3185 phys_map.ptr = PHYS_MAP_NODE_NIL;
3186 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3187 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3188 phys_section_rom = dummy_section(&io_mem_rom);
3189 phys_section_watch = dummy_section(&io_mem_watch);
3190 }
3191
3192 static void core_commit(MemoryListener *listener)
3193 {
3194 CPUArchState *env;
3195
3196 /* since each CPU stores ram addresses in its TLB cache, we must
3197 reset the modified entries */
3198 /* XXX: slow ! */
3199 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3200 tlb_flush(env, 1);
3201 }
3202 }
3203
3204 static void core_region_add(MemoryListener *listener,
3205 MemoryRegionSection *section)
3206 {
3207 cpu_register_physical_memory_log(section, section->readonly);
3208 }
3209
3210 static void core_region_del(MemoryListener *listener,
3211 MemoryRegionSection *section)
3212 {
3213 }
3214
3215 static void core_region_nop(MemoryListener *listener,
3216 MemoryRegionSection *section)
3217 {
3218 cpu_register_physical_memory_log(section, section->readonly);
3219 }
3220
3221 static void core_log_start(MemoryListener *listener,
3222 MemoryRegionSection *section)
3223 {
3224 }
3225
3226 static void core_log_stop(MemoryListener *listener,
3227 MemoryRegionSection *section)
3228 {
3229 }
3230
3231 static void core_log_sync(MemoryListener *listener,
3232 MemoryRegionSection *section)
3233 {
3234 }
3235
3236 static void core_log_global_start(MemoryListener *listener)
3237 {
3238 cpu_physical_memory_set_dirty_tracking(1);
3239 }
3240
3241 static void core_log_global_stop(MemoryListener *listener)
3242 {
3243 cpu_physical_memory_set_dirty_tracking(0);
3244 }
3245
3246 static void core_eventfd_add(MemoryListener *listener,
3247 MemoryRegionSection *section,
3248 bool match_data, uint64_t data, EventNotifier *e)
3249 {
3250 }
3251
3252 static void core_eventfd_del(MemoryListener *listener,
3253 MemoryRegionSection *section,
3254 bool match_data, uint64_t data, EventNotifier *e)
3255 {
3256 }
3257
3258 static void io_begin(MemoryListener *listener)
3259 {
3260 }
3261
3262 static void io_commit(MemoryListener *listener)
3263 {
3264 }
3265
3266 static void io_region_add(MemoryListener *listener,
3267 MemoryRegionSection *section)
3268 {
3269 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3270
3271 mrio->mr = section->mr;
3272 mrio->offset = section->offset_within_region;
3273 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3274 section->offset_within_address_space, section->size);
3275 ioport_register(&mrio->iorange);
3276 }
3277
3278 static void io_region_del(MemoryListener *listener,
3279 MemoryRegionSection *section)
3280 {
3281 isa_unassign_ioport(section->offset_within_address_space, section->size);
3282 }
3283
3284 static void io_region_nop(MemoryListener *listener,
3285 MemoryRegionSection *section)
3286 {
3287 }
3288
3289 static void io_log_start(MemoryListener *listener,
3290 MemoryRegionSection *section)
3291 {
3292 }
3293
3294 static void io_log_stop(MemoryListener *listener,
3295 MemoryRegionSection *section)
3296 {
3297 }
3298
3299 static void io_log_sync(MemoryListener *listener,
3300 MemoryRegionSection *section)
3301 {
3302 }
3303
3304 static void io_log_global_start(MemoryListener *listener)
3305 {
3306 }
3307
3308 static void io_log_global_stop(MemoryListener *listener)
3309 {
3310 }
3311
3312 static void io_eventfd_add(MemoryListener *listener,
3313 MemoryRegionSection *section,
3314 bool match_data, uint64_t data, EventNotifier *e)
3315 {
3316 }
3317
3318 static void io_eventfd_del(MemoryListener *listener,
3319 MemoryRegionSection *section,
3320 bool match_data, uint64_t data, EventNotifier *e)
3321 {
3322 }
3323
3324 static MemoryListener core_memory_listener = {
3325 .begin = core_begin,
3326 .commit = core_commit,
3327 .region_add = core_region_add,
3328 .region_del = core_region_del,
3329 .region_nop = core_region_nop,
3330 .log_start = core_log_start,
3331 .log_stop = core_log_stop,
3332 .log_sync = core_log_sync,
3333 .log_global_start = core_log_global_start,
3334 .log_global_stop = core_log_global_stop,
3335 .eventfd_add = core_eventfd_add,
3336 .eventfd_del = core_eventfd_del,
3337 .priority = 0,
3338 };
3339
3340 static MemoryListener io_memory_listener = {
3341 .begin = io_begin,
3342 .commit = io_commit,
3343 .region_add = io_region_add,
3344 .region_del = io_region_del,
3345 .region_nop = io_region_nop,
3346 .log_start = io_log_start,
3347 .log_stop = io_log_stop,
3348 .log_sync = io_log_sync,
3349 .log_global_start = io_log_global_start,
3350 .log_global_stop = io_log_global_stop,
3351 .eventfd_add = io_eventfd_add,
3352 .eventfd_del = io_eventfd_del,
3353 .priority = 0,
3354 };
3355
3356 static void memory_map_init(void)
3357 {
3358 system_memory = g_malloc(sizeof(*system_memory));
3359 memory_region_init(system_memory, "system", INT64_MAX);
3360 set_system_memory_map(system_memory);
3361
3362 system_io = g_malloc(sizeof(*system_io));
3363 memory_region_init(system_io, "io", 65536);
3364 set_system_io_map(system_io);
3365
3366 memory_listener_register(&core_memory_listener, system_memory);
3367 memory_listener_register(&io_memory_listener, system_io);
3368 }
3369
3370 MemoryRegion *get_system_memory(void)
3371 {
3372 return system_memory;
3373 }
3374
3375 MemoryRegion *get_system_io(void)
3376 {
3377 return system_io;
3378 }
3379
3380 #endif /* !defined(CONFIG_USER_ONLY) */
3381
3382 /* physical memory access (slow version, mainly for debug) */
3383 #if defined(CONFIG_USER_ONLY)
3384 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3385 uint8_t *buf, int len, int is_write)
3386 {
3387 int l, flags;
3388 target_ulong page;
3389 void * p;
3390
3391 while (len > 0) {
3392 page = addr & TARGET_PAGE_MASK;
3393 l = (page + TARGET_PAGE_SIZE) - addr;
3394 if (l > len)
3395 l = len;
3396 flags = page_get_flags(page);
3397 if (!(flags & PAGE_VALID))
3398 return -1;
3399 if (is_write) {
3400 if (!(flags & PAGE_WRITE))
3401 return -1;
3402 /* XXX: this code should not depend on lock_user */
3403 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3404 return -1;
3405 memcpy(p, buf, l);
3406 unlock_user(p, addr, l);
3407 } else {
3408 if (!(flags & PAGE_READ))
3409 return -1;
3410 /* XXX: this code should not depend on lock_user */
3411 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3412 return -1;
3413 memcpy(buf, p, l);
3414 unlock_user(p, addr, 0);
3415 }
3416 len -= l;
3417 buf += l;
3418 addr += l;
3419 }
3420 return 0;
3421 }
3422
3423 #else
3424
3425 static void invalidate_and_set_dirty(target_phys_addr_t addr,
3426 target_phys_addr_t length)
3427 {
3428 if (!cpu_physical_memory_is_dirty(addr)) {
3429 /* invalidate code */
3430 tb_invalidate_phys_page_range(addr, addr + length, 0);
3431 /* set dirty bit */
3432 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3433 }
3434 xen_modified_memory(addr, length);
3435 }
3436
3437 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3438 int len, int is_write)
3439 {
3440 int l;
3441 uint8_t *ptr;
3442 uint32_t val;
3443 target_phys_addr_t page;
3444 MemoryRegionSection *section;
3445
3446 while (len > 0) {
3447 page = addr & TARGET_PAGE_MASK;
3448 l = (page + TARGET_PAGE_SIZE) - addr;
3449 if (l > len)
3450 l = len;
3451 section = phys_page_find(page >> TARGET_PAGE_BITS);
3452
3453 if (is_write) {
3454 if (!memory_region_is_ram(section->mr)) {
3455 target_phys_addr_t addr1;
3456 addr1 = memory_region_section_addr(section, addr);
3457 /* XXX: could force cpu_single_env to NULL to avoid
3458 potential bugs */
3459 if (l >= 4 && ((addr1 & 3) == 0)) {
3460 /* 32 bit write access */
3461 val = ldl_p(buf);
3462 io_mem_write(section->mr, addr1, val, 4);
3463 l = 4;
3464 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3465 /* 16 bit write access */
3466 val = lduw_p(buf);
3467 io_mem_write(section->mr, addr1, val, 2);
3468 l = 2;
3469 } else {
3470 /* 8 bit write access */
3471 val = ldub_p(buf);
3472 io_mem_write(section->mr, addr1, val, 1);
3473 l = 1;
3474 }
3475 } else if (!section->readonly) {
3476 ram_addr_t addr1;
3477 addr1 = memory_region_get_ram_addr(section->mr)
3478 + memory_region_section_addr(section, addr);
3479 /* RAM case */
3480 ptr = qemu_get_ram_ptr(addr1);
3481 memcpy(ptr, buf, l);
3482 invalidate_and_set_dirty(addr1, l);
3483 qemu_put_ram_ptr(ptr);
3484 }
3485 } else {
3486 if (!(memory_region_is_ram(section->mr) ||
3487 memory_region_is_romd(section->mr))) {
3488 target_phys_addr_t addr1;
3489 /* I/O case */
3490 addr1 = memory_region_section_addr(section, addr);
3491 if (l >= 4 && ((addr1 & 3) == 0)) {
3492 /* 32 bit read access */
3493 val = io_mem_read(section->mr, addr1, 4);
3494 stl_p(buf, val);
3495 l = 4;
3496 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3497 /* 16 bit read access */
3498 val = io_mem_read(section->mr, addr1, 2);
3499 stw_p(buf, val);
3500 l = 2;
3501 } else {
3502 /* 8 bit read access */
3503 val = io_mem_read(section->mr, addr1, 1);
3504 stb_p(buf, val);
3505 l = 1;
3506 }
3507 } else {
3508 /* RAM case */
3509 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3510 + memory_region_section_addr(section,
3511 addr));
3512 memcpy(buf, ptr, l);
3513 qemu_put_ram_ptr(ptr);
3514 }
3515 }
3516 len -= l;
3517 buf += l;
3518 addr += l;
3519 }
3520 }
3521
3522 /* used for ROM loading : can write in RAM and ROM */
3523 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3524 const uint8_t *buf, int len)
3525 {
3526 int l;
3527 uint8_t *ptr;
3528 target_phys_addr_t page;
3529 MemoryRegionSection *section;
3530
3531 while (len > 0) {
3532 page = addr & TARGET_PAGE_MASK;
3533 l = (page + TARGET_PAGE_SIZE) - addr;
3534 if (l > len)
3535 l = len;
3536 section = phys_page_find(page >> TARGET_PAGE_BITS);
3537
3538 if (!(memory_region_is_ram(section->mr) ||
3539 memory_region_is_romd(section->mr))) {
3540 /* do nothing */
3541 } else {
3542 unsigned long addr1;
3543 addr1 = memory_region_get_ram_addr(section->mr)
3544 + memory_region_section_addr(section, addr);
3545 /* ROM/RAM case */
3546 ptr = qemu_get_ram_ptr(addr1);
3547 memcpy(ptr, buf, l);
3548 invalidate_and_set_dirty(addr1, l);
3549 qemu_put_ram_ptr(ptr);
3550 }
3551 len -= l;
3552 buf += l;
3553 addr += l;
3554 }
3555 }
3556
3557 typedef struct {
3558 void *buffer;
3559 target_phys_addr_t addr;
3560 target_phys_addr_t len;
3561 } BounceBuffer;
3562
3563 static BounceBuffer bounce;
3564
3565 typedef struct MapClient {
3566 void *opaque;
3567 void (*callback)(void *opaque);
3568 QLIST_ENTRY(MapClient) link;
3569 } MapClient;
3570
3571 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3572 = QLIST_HEAD_INITIALIZER(map_client_list);
3573
3574 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3575 {
3576 MapClient *client = g_malloc(sizeof(*client));
3577
3578 client->opaque = opaque;
3579 client->callback = callback;
3580 QLIST_INSERT_HEAD(&map_client_list, client, link);
3581 return client;
3582 }
3583
3584 void cpu_unregister_map_client(void *_client)
3585 {
3586 MapClient *client = (MapClient *)_client;
3587
3588 QLIST_REMOVE(client, link);
3589 g_free(client);
3590 }
3591
3592 static void cpu_notify_map_clients(void)
3593 {
3594 MapClient *client;
3595
3596 while (!QLIST_EMPTY(&map_client_list)) {
3597 client = QLIST_FIRST(&map_client_list);
3598 client->callback(client->opaque);
3599 cpu_unregister_map_client(client);
3600 }
3601 }
3602
3603 /* Map a physical memory region into a host virtual address.
3604 * May map a subset of the requested range, given by and returned in *plen.
3605 * May return NULL if resources needed to perform the mapping are exhausted.
3606 * Use only for reads OR writes - not for read-modify-write operations.
3607 * Use cpu_register_map_client() to know when retrying the map operation is
3608 * likely to succeed.
3609 */
3610 void *cpu_physical_memory_map(target_phys_addr_t addr,
3611 target_phys_addr_t *plen,
3612 int is_write)
3613 {
3614 target_phys_addr_t len = *plen;
3615 target_phys_addr_t todo = 0;
3616 int l;
3617 target_phys_addr_t page;
3618 MemoryRegionSection *section;
3619 ram_addr_t raddr = RAM_ADDR_MAX;
3620 ram_addr_t rlen;
3621 void *ret;
3622
3623 while (len > 0) {
3624 page = addr & TARGET_PAGE_MASK;
3625 l = (page + TARGET_PAGE_SIZE) - addr;
3626 if (l > len)
3627 l = len;
3628 section = phys_page_find(page >> TARGET_PAGE_BITS);
3629
3630 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3631 if (todo || bounce.buffer) {
3632 break;
3633 }
3634 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3635 bounce.addr = addr;
3636 bounce.len = l;
3637 if (!is_write) {
3638 cpu_physical_memory_read(addr, bounce.buffer, l);
3639 }
3640
3641 *plen = l;
3642 return bounce.buffer;
3643 }
3644 if (!todo) {
3645 raddr = memory_region_get_ram_addr(section->mr)
3646 + memory_region_section_addr(section, addr);
3647 }
3648
3649 len -= l;
3650 addr += l;
3651 todo += l;
3652 }
3653 rlen = todo;
3654 ret = qemu_ram_ptr_length(raddr, &rlen);
3655 *plen = rlen;
3656 return ret;
3657 }
3658
3659 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3660 * Will also mark the memory as dirty if is_write == 1. access_len gives
3661 * the amount of memory that was actually read or written by the caller.
3662 */
3663 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3664 int is_write, target_phys_addr_t access_len)
3665 {
3666 if (buffer != bounce.buffer) {
3667 if (is_write) {
3668 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3669 while (access_len) {
3670 unsigned l;
3671 l = TARGET_PAGE_SIZE;
3672 if (l > access_len)
3673 l = access_len;
3674 invalidate_and_set_dirty(addr1, l);
3675 addr1 += l;
3676 access_len -= l;
3677 }
3678 }
3679 if (xen_enabled()) {
3680 xen_invalidate_map_cache_entry(buffer);
3681 }
3682 return;
3683 }
3684 if (is_write) {
3685 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3686 }
3687 qemu_vfree(bounce.buffer);
3688 bounce.buffer = NULL;
3689 cpu_notify_map_clients();
3690 }
3691
3692 /* warning: addr must be aligned */
3693 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3694 enum device_endian endian)
3695 {
3696 uint8_t *ptr;
3697 uint32_t val;
3698 MemoryRegionSection *section;
3699
3700 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3701
3702 if (!(memory_region_is_ram(section->mr) ||
3703 memory_region_is_romd(section->mr))) {
3704 /* I/O case */
3705 addr = memory_region_section_addr(section, addr);
3706 val = io_mem_read(section->mr, addr, 4);
3707 #if defined(TARGET_WORDS_BIGENDIAN)
3708 if (endian == DEVICE_LITTLE_ENDIAN) {
3709 val = bswap32(val);
3710 }
3711 #else
3712 if (endian == DEVICE_BIG_ENDIAN) {
3713 val = bswap32(val);
3714 }
3715 #endif
3716 } else {
3717 /* RAM case */
3718 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3719 & TARGET_PAGE_MASK)
3720 + memory_region_section_addr(section, addr));
3721 switch (endian) {
3722 case DEVICE_LITTLE_ENDIAN:
3723 val = ldl_le_p(ptr);
3724 break;
3725 case DEVICE_BIG_ENDIAN:
3726 val = ldl_be_p(ptr);
3727 break;
3728 default:
3729 val = ldl_p(ptr);
3730 break;
3731 }
3732 }
3733 return val;
3734 }
3735
3736 uint32_t ldl_phys(target_phys_addr_t addr)
3737 {
3738 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3739 }
3740
3741 uint32_t ldl_le_phys(target_phys_addr_t addr)
3742 {
3743 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3744 }
3745
3746 uint32_t ldl_be_phys(target_phys_addr_t addr)
3747 {
3748 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3749 }
3750
3751 /* warning: addr must be aligned */
3752 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3753 enum device_endian endian)
3754 {
3755 uint8_t *ptr;
3756 uint64_t val;
3757 MemoryRegionSection *section;
3758
3759 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3760
3761 if (!(memory_region_is_ram(section->mr) ||
3762 memory_region_is_romd(section->mr))) {
3763 /* I/O case */
3764 addr = memory_region_section_addr(section, addr);
3765
3766 /* XXX This is broken when device endian != cpu endian.
3767 Fix and add "endian" variable check */
3768 #ifdef TARGET_WORDS_BIGENDIAN
3769 val = io_mem_read(section->mr, addr, 4) << 32;
3770 val |= io_mem_read(section->mr, addr + 4, 4);
3771 #else
3772 val = io_mem_read(section->mr, addr, 4);
3773 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3774 #endif
3775 } else {
3776 /* RAM case */
3777 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3778 & TARGET_PAGE_MASK)
3779 + memory_region_section_addr(section, addr));
3780 switch (endian) {
3781 case DEVICE_LITTLE_ENDIAN:
3782 val = ldq_le_p(ptr);
3783 break;
3784 case DEVICE_BIG_ENDIAN:
3785 val = ldq_be_p(ptr);
3786 break;
3787 default:
3788 val = ldq_p(ptr);
3789 break;
3790 }
3791 }
3792 return val;
3793 }
3794
3795 uint64_t ldq_phys(target_phys_addr_t addr)
3796 {
3797 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3798 }
3799
3800 uint64_t ldq_le_phys(target_phys_addr_t addr)
3801 {
3802 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3803 }
3804
3805 uint64_t ldq_be_phys(target_phys_addr_t addr)
3806 {
3807 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3808 }
3809
3810 /* XXX: optimize */
3811 uint32_t ldub_phys(target_phys_addr_t addr)
3812 {
3813 uint8_t val;
3814 cpu_physical_memory_read(addr, &val, 1);
3815 return val;
3816 }
3817
3818 /* warning: addr must be aligned */
3819 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3820 enum device_endian endian)
3821 {
3822 uint8_t *ptr;
3823 uint64_t val;
3824 MemoryRegionSection *section;
3825
3826 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3827
3828 if (!(memory_region_is_ram(section->mr) ||
3829 memory_region_is_romd(section->mr))) {
3830 /* I/O case */
3831 addr = memory_region_section_addr(section, addr);
3832 val = io_mem_read(section->mr, addr, 2);
3833 #if defined(TARGET_WORDS_BIGENDIAN)
3834 if (endian == DEVICE_LITTLE_ENDIAN) {
3835 val = bswap16(val);
3836 }
3837 #else
3838 if (endian == DEVICE_BIG_ENDIAN) {
3839 val = bswap16(val);
3840 }
3841 #endif
3842 } else {
3843 /* RAM case */
3844 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3845 & TARGET_PAGE_MASK)
3846 + memory_region_section_addr(section, addr));
3847 switch (endian) {
3848 case DEVICE_LITTLE_ENDIAN:
3849 val = lduw_le_p(ptr);
3850 break;
3851 case DEVICE_BIG_ENDIAN:
3852 val = lduw_be_p(ptr);
3853 break;
3854 default:
3855 val = lduw_p(ptr);
3856 break;
3857 }
3858 }
3859 return val;
3860 }
3861
3862 uint32_t lduw_phys(target_phys_addr_t addr)
3863 {
3864 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3865 }
3866
3867 uint32_t lduw_le_phys(target_phys_addr_t addr)
3868 {
3869 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3870 }
3871
3872 uint32_t lduw_be_phys(target_phys_addr_t addr)
3873 {
3874 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3875 }
3876
3877 /* warning: addr must be aligned. The ram page is not masked as dirty
3878 and the code inside is not invalidated. It is useful if the dirty
3879 bits are used to track modified PTEs */
3880 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3881 {
3882 uint8_t *ptr;
3883 MemoryRegionSection *section;
3884
3885 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3886
3887 if (!memory_region_is_ram(section->mr) || section->readonly) {
3888 addr = memory_region_section_addr(section, addr);
3889 if (memory_region_is_ram(section->mr)) {
3890 section = &phys_sections[phys_section_rom];
3891 }
3892 io_mem_write(section->mr, addr, val, 4);
3893 } else {
3894 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3895 & TARGET_PAGE_MASK)
3896 + memory_region_section_addr(section, addr);
3897 ptr = qemu_get_ram_ptr(addr1);
3898 stl_p(ptr, val);
3899
3900 if (unlikely(in_migration)) {
3901 if (!cpu_physical_memory_is_dirty(addr1)) {
3902 /* invalidate code */
3903 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3904 /* set dirty bit */
3905 cpu_physical_memory_set_dirty_flags(
3906 addr1, (0xff & ~CODE_DIRTY_FLAG));
3907 }
3908 }
3909 }
3910 }
3911
3912 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3913 {
3914 uint8_t *ptr;
3915 MemoryRegionSection *section;
3916
3917 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3918
3919 if (!memory_region_is_ram(section->mr) || section->readonly) {
3920 addr = memory_region_section_addr(section, addr);
3921 if (memory_region_is_ram(section->mr)) {
3922 section = &phys_sections[phys_section_rom];
3923 }
3924 #ifdef TARGET_WORDS_BIGENDIAN
3925 io_mem_write(section->mr, addr, val >> 32, 4);
3926 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3927 #else
3928 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3929 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3930 #endif
3931 } else {
3932 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3933 & TARGET_PAGE_MASK)
3934 + memory_region_section_addr(section, addr));
3935 stq_p(ptr, val);
3936 }
3937 }
3938
3939 /* warning: addr must be aligned */
3940 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3941 enum device_endian endian)
3942 {
3943 uint8_t *ptr;
3944 MemoryRegionSection *section;
3945
3946 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3947
3948 if (!memory_region_is_ram(section->mr) || section->readonly) {
3949 addr = memory_region_section_addr(section, addr);
3950 if (memory_region_is_ram(section->mr)) {
3951 section = &phys_sections[phys_section_rom];
3952 }
3953 #if defined(TARGET_WORDS_BIGENDIAN)
3954 if (endian == DEVICE_LITTLE_ENDIAN) {
3955 val = bswap32(val);
3956 }
3957 #else
3958 if (endian == DEVICE_BIG_ENDIAN) {
3959 val = bswap32(val);
3960 }
3961 #endif
3962 io_mem_write(section->mr, addr, val, 4);
3963 } else {
3964 unsigned long addr1;
3965 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3966 + memory_region_section_addr(section, addr);
3967 /* RAM case */
3968 ptr = qemu_get_ram_ptr(addr1);
3969 switch (endian) {
3970 case DEVICE_LITTLE_ENDIAN:
3971 stl_le_p(ptr, val);
3972 break;
3973 case DEVICE_BIG_ENDIAN:
3974 stl_be_p(ptr, val);
3975 break;
3976 default:
3977 stl_p(ptr, val);
3978 break;
3979 }
3980 invalidate_and_set_dirty(addr1, 4);
3981 }
3982 }
3983
3984 void stl_phys(target_phys_addr_t addr, uint32_t val)
3985 {
3986 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3987 }
3988
3989 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3990 {
3991 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3992 }
3993
3994 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3995 {
3996 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3997 }
3998
3999 /* XXX: optimize */
4000 void stb_phys(target_phys_addr_t addr, uint32_t val)
4001 {
4002 uint8_t v = val;
4003 cpu_physical_memory_write(addr, &v, 1);
4004 }
4005
4006 /* warning: addr must be aligned */
4007 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4008 enum device_endian endian)
4009 {
4010 uint8_t *ptr;
4011 MemoryRegionSection *section;
4012
4013 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4014
4015 if (!memory_region_is_ram(section->mr) || section->readonly) {
4016 addr = memory_region_section_addr(section, addr);
4017 if (memory_region_is_ram(section->mr)) {
4018 section = &phys_sections[phys_section_rom];
4019 }
4020 #if defined(TARGET_WORDS_BIGENDIAN)
4021 if (endian == DEVICE_LITTLE_ENDIAN) {
4022 val = bswap16(val);
4023 }
4024 #else
4025 if (endian == DEVICE_BIG_ENDIAN) {
4026 val = bswap16(val);
4027 }
4028 #endif
4029 io_mem_write(section->mr, addr, val, 2);
4030 } else {
4031 unsigned long addr1;
4032 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4033 + memory_region_section_addr(section, addr);
4034 /* RAM case */
4035 ptr = qemu_get_ram_ptr(addr1);
4036 switch (endian) {
4037 case DEVICE_LITTLE_ENDIAN:
4038 stw_le_p(ptr, val);
4039 break;
4040 case DEVICE_BIG_ENDIAN:
4041 stw_be_p(ptr, val);
4042 break;
4043 default:
4044 stw_p(ptr, val);
4045 break;
4046 }
4047 invalidate_and_set_dirty(addr1, 2);
4048 }
4049 }
4050
4051 void stw_phys(target_phys_addr_t addr, uint32_t val)
4052 {
4053 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4054 }
4055
4056 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4057 {
4058 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4059 }
4060
4061 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4062 {
4063 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4064 }
4065
4066 /* XXX: optimize */
4067 void stq_phys(target_phys_addr_t addr, uint64_t val)
4068 {
4069 val = tswap64(val);
4070 cpu_physical_memory_write(addr, &val, 8);
4071 }
4072
4073 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4074 {
4075 val = cpu_to_le64(val);
4076 cpu_physical_memory_write(addr, &val, 8);
4077 }
4078
4079 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4080 {
4081 val = cpu_to_be64(val);
4082 cpu_physical_memory_write(addr, &val, 8);
4083 }
4084
4085 /* virtual memory access for debug (includes writing to ROM) */
4086 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4087 uint8_t *buf, int len, int is_write)
4088 {
4089 int l;
4090 target_phys_addr_t phys_addr;
4091 target_ulong page;
4092
4093 while (len > 0) {
4094 page = addr & TARGET_PAGE_MASK;
4095 phys_addr = cpu_get_phys_page_debug(env, page);
4096 /* if no physical page mapped, return an error */
4097 if (phys_addr == -1)
4098 return -1;
4099 l = (page + TARGET_PAGE_SIZE) - addr;
4100 if (l > len)
4101 l = len;
4102 phys_addr += (addr & ~TARGET_PAGE_MASK);
4103 if (is_write)
4104 cpu_physical_memory_write_rom(phys_addr, buf, l);
4105 else
4106 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4107 len -= l;
4108 buf += l;
4109 addr += l;
4110 }
4111 return 0;
4112 }
4113 #endif
4114
4115 /* in deterministic execution mode, instructions doing device I/Os
4116 must be at the end of the TB */
4117 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4118 {
4119 TranslationBlock *tb;
4120 uint32_t n, cflags;
4121 target_ulong pc, cs_base;
4122 uint64_t flags;
4123
4124 tb = tb_find_pc(retaddr);
4125 if (!tb) {
4126 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4127 (void *)retaddr);
4128 }
4129 n = env->icount_decr.u16.low + tb->icount;
4130 cpu_restore_state(tb, env, retaddr);
4131 /* Calculate how many instructions had been executed before the fault
4132 occurred. */
4133 n = n - env->icount_decr.u16.low;
4134 /* Generate a new TB ending on the I/O insn. */
4135 n++;
4136 /* On MIPS and SH, delay slot instructions can only be restarted if
4137 they were already the first instruction in the TB. If this is not
4138 the first instruction in a TB then re-execute the preceding
4139 branch. */
4140 #if defined(TARGET_MIPS)
4141 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4142 env->active_tc.PC -= 4;
4143 env->icount_decr.u16.low++;
4144 env->hflags &= ~MIPS_HFLAG_BMASK;
4145 }
4146 #elif defined(TARGET_SH4)
4147 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4148 && n > 1) {
4149 env->pc -= 2;
4150 env->icount_decr.u16.low++;
4151 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4152 }
4153 #endif
4154 /* This should never happen. */
4155 if (n > CF_COUNT_MASK)
4156 cpu_abort(env, "TB too big during recompile");
4157
4158 cflags = n | CF_LAST_IO;
4159 pc = tb->pc;
4160 cs_base = tb->cs_base;
4161 flags = tb->flags;
4162 tb_phys_invalidate(tb, -1);
4163 /* FIXME: In theory this could raise an exception. In practice
4164 we have already translated the block once so it's probably ok. */
4165 tb_gen_code(env, pc, cs_base, flags, cflags);
4166 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4167 the first in the TB) then we end up generating a whole new TB and
4168 repeating the fault, which is horribly inefficient.
4169 Better would be to execute just this insn uncached, or generate a
4170 second new TB. */
4171 cpu_resume_from_signal(env, NULL);
4172 }
4173
4174 #if !defined(CONFIG_USER_ONLY)
4175
4176 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4177 {
4178 int i, target_code_size, max_target_code_size;
4179 int direct_jmp_count, direct_jmp2_count, cross_page;
4180 TranslationBlock *tb;
4181
4182 target_code_size = 0;
4183 max_target_code_size = 0;
4184 cross_page = 0;
4185 direct_jmp_count = 0;
4186 direct_jmp2_count = 0;
4187 for(i = 0; i < nb_tbs; i++) {
4188 tb = &tbs[i];
4189 target_code_size += tb->size;
4190 if (tb->size > max_target_code_size)
4191 max_target_code_size = tb->size;
4192 if (tb->page_addr[1] != -1)
4193 cross_page++;
4194 if (tb->tb_next_offset[0] != 0xffff) {
4195 direct_jmp_count++;
4196 if (tb->tb_next_offset[1] != 0xffff) {
4197 direct_jmp2_count++;
4198 }
4199 }
4200 }
4201 /* XXX: avoid using doubles ? */
4202 cpu_fprintf(f, "Translation buffer state:\n");
4203 cpu_fprintf(f, "gen code size %td/%zd\n",
4204 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4205 cpu_fprintf(f, "TB count %d/%d\n",
4206 nb_tbs, code_gen_max_blocks);
4207 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4208 nb_tbs ? target_code_size / nb_tbs : 0,
4209 max_target_code_size);
4210 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4211 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4212 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4213 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4214 cross_page,
4215 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4216 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4217 direct_jmp_count,
4218 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4219 direct_jmp2_count,
4220 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4221 cpu_fprintf(f, "\nStatistics:\n");
4222 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4223 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4224 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4225 tcg_dump_info(f, cpu_fprintf);
4226 }
4227
4228 /*
4229 * A helper function for the _utterly broken_ virtio device model to find out if
4230 * it's running on a big endian machine. Don't do this at home kids!
4231 */
4232 bool virtio_is_big_endian(void);
4233 bool virtio_is_big_endian(void)
4234 {
4235 #if defined(TARGET_WORDS_BIGENDIAN)
4236 return true;
4237 #else
4238 return false;
4239 #endif
4240 }
4241
4242 #endif
4243
4244 #ifndef CONFIG_USER_ONLY
4245 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4246 {
4247 MemoryRegionSection *section;
4248
4249 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4250
4251 return !(memory_region_is_ram(section->mr) ||
4252 memory_region_is_romd(section->mr));
4253 }
4254 #endif