]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
exec: Do not use absolute address hints for code_gen_buffer with -fpie
[mirror_qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 #if defined(__arm__) || defined(__sparc__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
103
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static size_t code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static size_t code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
110
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
114
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
119
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
122
123 #endif
124
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
145
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
157
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
161
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
174
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
182
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
186
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
189
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
196
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
201 };
202
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212
213 static void io_mem_init(void);
214 static void memory_map_init(void);
215
216 static MemoryRegion io_mem_watch;
217 #endif
218
219 /* statistics */
220 static int tb_flush_count;
221 static int tb_phys_invalidate_count;
222
223 #ifdef _WIN32
224 static void map_exec(void *addr, long size)
225 {
226 DWORD old_protect;
227 VirtualProtect(addr, size,
228 PAGE_EXECUTE_READWRITE, &old_protect);
229
230 }
231 #else
232 static void map_exec(void *addr, long size)
233 {
234 unsigned long start, end, page_size;
235
236 page_size = getpagesize();
237 start = (unsigned long)addr;
238 start &= ~(page_size - 1);
239
240 end = (unsigned long)addr + size;
241 end += page_size - 1;
242 end &= ~(page_size - 1);
243
244 mprotect((void *)start, end - start,
245 PROT_READ | PROT_WRITE | PROT_EXEC);
246 }
247 #endif
248
249 static void page_init(void)
250 {
251 /* NOTE: we can always suppose that qemu_host_page_size >=
252 TARGET_PAGE_SIZE */
253 #ifdef _WIN32
254 {
255 SYSTEM_INFO system_info;
256
257 GetSystemInfo(&system_info);
258 qemu_real_host_page_size = system_info.dwPageSize;
259 }
260 #else
261 qemu_real_host_page_size = getpagesize();
262 #endif
263 if (qemu_host_page_size == 0)
264 qemu_host_page_size = qemu_real_host_page_size;
265 if (qemu_host_page_size < TARGET_PAGE_SIZE)
266 qemu_host_page_size = TARGET_PAGE_SIZE;
267 qemu_host_page_mask = ~(qemu_host_page_size - 1);
268
269 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
270 {
271 #ifdef HAVE_KINFO_GETVMMAP
272 struct kinfo_vmentry *freep;
273 int i, cnt;
274
275 freep = kinfo_getvmmap(getpid(), &cnt);
276 if (freep) {
277 mmap_lock();
278 for (i = 0; i < cnt; i++) {
279 unsigned long startaddr, endaddr;
280
281 startaddr = freep[i].kve_start;
282 endaddr = freep[i].kve_end;
283 if (h2g_valid(startaddr)) {
284 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
285
286 if (h2g_valid(endaddr)) {
287 endaddr = h2g(endaddr);
288 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
289 } else {
290 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
291 endaddr = ~0ul;
292 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
293 #endif
294 }
295 }
296 }
297 free(freep);
298 mmap_unlock();
299 }
300 #else
301 FILE *f;
302
303 last_brk = (unsigned long)sbrk(0);
304
305 f = fopen("/compat/linux/proc/self/maps", "r");
306 if (f) {
307 mmap_lock();
308
309 do {
310 unsigned long startaddr, endaddr;
311 int n;
312
313 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
314
315 if (n == 2 && h2g_valid(startaddr)) {
316 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
317
318 if (h2g_valid(endaddr)) {
319 endaddr = h2g(endaddr);
320 } else {
321 endaddr = ~0ul;
322 }
323 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
324 }
325 } while (!feof(f));
326
327 fclose(f);
328 mmap_unlock();
329 }
330 #endif
331 }
332 #endif
333 }
334
335 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
336 {
337 PageDesc *pd;
338 void **lp;
339 int i;
340
341 #if defined(CONFIG_USER_ONLY)
342 /* We can't use g_malloc because it may recurse into a locked mutex. */
343 # define ALLOC(P, SIZE) \
344 do { \
345 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
346 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
347 } while (0)
348 #else
349 # define ALLOC(P, SIZE) \
350 do { P = g_malloc0(SIZE); } while (0)
351 #endif
352
353 /* Level 1. Always allocated. */
354 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
355
356 /* Level 2..N-1. */
357 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
358 void **p = *lp;
359
360 if (p == NULL) {
361 if (!alloc) {
362 return NULL;
363 }
364 ALLOC(p, sizeof(void *) * L2_SIZE);
365 *lp = p;
366 }
367
368 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
369 }
370
371 pd = *lp;
372 if (pd == NULL) {
373 if (!alloc) {
374 return NULL;
375 }
376 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
377 *lp = pd;
378 }
379
380 #undef ALLOC
381
382 return pd + (index & (L2_SIZE - 1));
383 }
384
385 static inline PageDesc *page_find(tb_page_addr_t index)
386 {
387 return page_find_alloc(index, 0);
388 }
389
390 #if !defined(CONFIG_USER_ONLY)
391
392 static void phys_map_node_reserve(unsigned nodes)
393 {
394 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
395 typedef PhysPageEntry Node[L2_SIZE];
396 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
397 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
398 phys_map_nodes_nb + nodes);
399 phys_map_nodes = g_renew(Node, phys_map_nodes,
400 phys_map_nodes_nb_alloc);
401 }
402 }
403
404 static uint16_t phys_map_node_alloc(void)
405 {
406 unsigned i;
407 uint16_t ret;
408
409 ret = phys_map_nodes_nb++;
410 assert(ret != PHYS_MAP_NODE_NIL);
411 assert(ret != phys_map_nodes_nb_alloc);
412 for (i = 0; i < L2_SIZE; ++i) {
413 phys_map_nodes[ret][i].is_leaf = 0;
414 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
415 }
416 return ret;
417 }
418
419 static void phys_map_nodes_reset(void)
420 {
421 phys_map_nodes_nb = 0;
422 }
423
424
425 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
426 target_phys_addr_t *nb, uint16_t leaf,
427 int level)
428 {
429 PhysPageEntry *p;
430 int i;
431 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
432
433 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
434 lp->ptr = phys_map_node_alloc();
435 p = phys_map_nodes[lp->ptr];
436 if (level == 0) {
437 for (i = 0; i < L2_SIZE; i++) {
438 p[i].is_leaf = 1;
439 p[i].ptr = phys_section_unassigned;
440 }
441 }
442 } else {
443 p = phys_map_nodes[lp->ptr];
444 }
445 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
446
447 while (*nb && lp < &p[L2_SIZE]) {
448 if ((*index & (step - 1)) == 0 && *nb >= step) {
449 lp->is_leaf = true;
450 lp->ptr = leaf;
451 *index += step;
452 *nb -= step;
453 } else {
454 phys_page_set_level(lp, index, nb, leaf, level - 1);
455 }
456 ++lp;
457 }
458 }
459
460 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
461 uint16_t leaf)
462 {
463 /* Wildly overreserve - it doesn't matter much. */
464 phys_map_node_reserve(3 * P_L2_LEVELS);
465
466 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
467 }
468
469 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
470 {
471 PhysPageEntry lp = phys_map;
472 PhysPageEntry *p;
473 int i;
474 uint16_t s_index = phys_section_unassigned;
475
476 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
477 if (lp.ptr == PHYS_MAP_NODE_NIL) {
478 goto not_found;
479 }
480 p = phys_map_nodes[lp.ptr];
481 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
482 }
483
484 s_index = lp.ptr;
485 not_found:
486 return &phys_sections[s_index];
487 }
488
489 bool memory_region_is_unassigned(MemoryRegion *mr)
490 {
491 return mr != &io_mem_ram && mr != &io_mem_rom
492 && mr != &io_mem_notdirty && !mr->rom_device
493 && mr != &io_mem_watch;
494 }
495
496 #define mmap_lock() do { } while(0)
497 #define mmap_unlock() do { } while(0)
498 #endif
499
500 #if defined(CONFIG_USER_ONLY)
501 /* Currently it is not recommended to allocate big chunks of data in
502 user mode. It will change when a dedicated libc will be used. */
503 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
504 region in which the guest needs to run. Revisit this. */
505 #define USE_STATIC_CODE_GEN_BUFFER
506 #endif
507
508 /* ??? Should configure for this, not list operating systems here. */
509 #if (defined(__linux__) \
510 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
511 || defined(__DragonFly__) || defined(__OpenBSD__) \
512 || defined(__NetBSD__))
513 # define USE_MMAP
514 #endif
515
516 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
517 indicated, this is constrained by the range of direct branches on the
518 host cpu, as used by the TCG implementation of goto_tb. */
519 #if defined(__x86_64__)
520 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
521 #elif defined(__sparc__)
522 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
523 #elif defined(__arm__)
524 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
525 #elif defined(__s390x__)
526 /* We have a +- 4GB range on the branches; leave some slop. */
527 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
528 #else
529 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
530 #endif
531
532 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
533
534 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
535 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
536 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
537
538 static inline size_t size_code_gen_buffer(size_t tb_size)
539 {
540 /* Size the buffer. */
541 if (tb_size == 0) {
542 #ifdef USE_STATIC_CODE_GEN_BUFFER
543 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
544 #else
545 /* ??? Needs adjustments. */
546 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
547 static buffer, we could size this on RESERVED_VA, on the text
548 segment size of the executable, or continue to use the default. */
549 tb_size = (unsigned long)(ram_size / 4);
550 #endif
551 }
552 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
553 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
554 }
555 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
556 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
557 }
558 code_gen_buffer_size = tb_size;
559 return tb_size;
560 }
561
562 #ifdef USE_STATIC_CODE_GEN_BUFFER
563 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
564 __attribute__((aligned(CODE_GEN_ALIGN)));
565
566 static inline void *alloc_code_gen_buffer(void)
567 {
568 map_exec(static_code_gen_buffer, code_gen_buffer_size);
569 return static_code_gen_buffer;
570 }
571 #elif defined(USE_MMAP)
572 static inline void *alloc_code_gen_buffer(void)
573 {
574 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
575 uintptr_t start = 0;
576 void *buf;
577
578 /* Constrain the position of the buffer based on the host cpu.
579 Note that these addresses are chosen in concert with the
580 addresses assigned in the relevant linker script file. */
581 # if defined(__PIE__) || defined(__PIC__)
582 /* Don't bother setting a preferred location if we're building
583 a position-independent executable. We're more likely to get
584 an address near the main executable if we let the kernel
585 choose the address. */
586 # elif defined(__x86_64__) && defined(MAP_32BIT)
587 /* Force the memory down into low memory with the executable.
588 Leave the choice of exact location with the kernel. */
589 flags |= MAP_32BIT;
590 /* Cannot expect to map more than 800MB in low memory. */
591 if (code_gen_buffer_size > 800u * 1024 * 1024) {
592 code_gen_buffer_size = 800u * 1024 * 1024;
593 }
594 # elif defined(__sparc__)
595 start = 0x40000000ul;
596 # elif defined(__s390x__)
597 start = 0x90000000ul;
598 # endif
599
600 buf = mmap((void *)start, code_gen_buffer_size,
601 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
602 return buf == MAP_FAILED ? NULL : buf;
603 }
604 #else
605 static inline void *alloc_code_gen_buffer(void)
606 {
607 void *buf = g_malloc(code_gen_buffer_size);
608 if (buf) {
609 map_exec(buf, code_gen_buffer_size);
610 }
611 return buf;
612 }
613 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
614
615 static inline void code_gen_alloc(size_t tb_size)
616 {
617 code_gen_buffer_size = size_code_gen_buffer(tb_size);
618 code_gen_buffer = alloc_code_gen_buffer();
619 if (code_gen_buffer == NULL) {
620 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
621 exit(1);
622 }
623
624 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
625 code_gen_buffer_max_size = code_gen_buffer_size -
626 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
627 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
628 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
629 }
630
631 /* Must be called before using the QEMU cpus. 'tb_size' is the size
632 (in bytes) allocated to the translation buffer. Zero means default
633 size. */
634 void tcg_exec_init(unsigned long tb_size)
635 {
636 cpu_gen_init();
637 code_gen_alloc(tb_size);
638 code_gen_ptr = code_gen_buffer;
639 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
640 page_init();
641 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
642 /* There's no guest base to take into account, so go ahead and
643 initialize the prologue now. */
644 tcg_prologue_init(&tcg_ctx);
645 #endif
646 }
647
648 bool tcg_enabled(void)
649 {
650 return code_gen_buffer != NULL;
651 }
652
653 void cpu_exec_init_all(void)
654 {
655 #if !defined(CONFIG_USER_ONLY)
656 memory_map_init();
657 io_mem_init();
658 #endif
659 }
660
661 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
662
663 static int cpu_common_post_load(void *opaque, int version_id)
664 {
665 CPUArchState *env = opaque;
666
667 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
668 version_id is increased. */
669 env->interrupt_request &= ~0x01;
670 tlb_flush(env, 1);
671
672 return 0;
673 }
674
675 static const VMStateDescription vmstate_cpu_common = {
676 .name = "cpu_common",
677 .version_id = 1,
678 .minimum_version_id = 1,
679 .minimum_version_id_old = 1,
680 .post_load = cpu_common_post_load,
681 .fields = (VMStateField []) {
682 VMSTATE_UINT32(halted, CPUArchState),
683 VMSTATE_UINT32(interrupt_request, CPUArchState),
684 VMSTATE_END_OF_LIST()
685 }
686 };
687 #endif
688
689 CPUArchState *qemu_get_cpu(int cpu)
690 {
691 CPUArchState *env = first_cpu;
692
693 while (env) {
694 if (env->cpu_index == cpu)
695 break;
696 env = env->next_cpu;
697 }
698
699 return env;
700 }
701
702 void cpu_exec_init(CPUArchState *env)
703 {
704 CPUArchState **penv;
705 int cpu_index;
706
707 #if defined(CONFIG_USER_ONLY)
708 cpu_list_lock();
709 #endif
710 env->next_cpu = NULL;
711 penv = &first_cpu;
712 cpu_index = 0;
713 while (*penv != NULL) {
714 penv = &(*penv)->next_cpu;
715 cpu_index++;
716 }
717 env->cpu_index = cpu_index;
718 env->numa_node = 0;
719 QTAILQ_INIT(&env->breakpoints);
720 QTAILQ_INIT(&env->watchpoints);
721 #ifndef CONFIG_USER_ONLY
722 env->thread_id = qemu_get_thread_id();
723 #endif
724 *penv = env;
725 #if defined(CONFIG_USER_ONLY)
726 cpu_list_unlock();
727 #endif
728 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
729 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
730 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
731 cpu_save, cpu_load, env);
732 #endif
733 }
734
735 /* Allocate a new translation block. Flush the translation buffer if
736 too many translation blocks or too much generated code. */
737 static TranslationBlock *tb_alloc(target_ulong pc)
738 {
739 TranslationBlock *tb;
740
741 if (nb_tbs >= code_gen_max_blocks ||
742 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
743 return NULL;
744 tb = &tbs[nb_tbs++];
745 tb->pc = pc;
746 tb->cflags = 0;
747 return tb;
748 }
749
750 void tb_free(TranslationBlock *tb)
751 {
752 /* In practice this is mostly used for single use temporary TB
753 Ignore the hard cases and just back up if this TB happens to
754 be the last one generated. */
755 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
756 code_gen_ptr = tb->tc_ptr;
757 nb_tbs--;
758 }
759 }
760
761 static inline void invalidate_page_bitmap(PageDesc *p)
762 {
763 if (p->code_bitmap) {
764 g_free(p->code_bitmap);
765 p->code_bitmap = NULL;
766 }
767 p->code_write_count = 0;
768 }
769
770 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
771
772 static void page_flush_tb_1 (int level, void **lp)
773 {
774 int i;
775
776 if (*lp == NULL) {
777 return;
778 }
779 if (level == 0) {
780 PageDesc *pd = *lp;
781 for (i = 0; i < L2_SIZE; ++i) {
782 pd[i].first_tb = NULL;
783 invalidate_page_bitmap(pd + i);
784 }
785 } else {
786 void **pp = *lp;
787 for (i = 0; i < L2_SIZE; ++i) {
788 page_flush_tb_1 (level - 1, pp + i);
789 }
790 }
791 }
792
793 static void page_flush_tb(void)
794 {
795 int i;
796 for (i = 0; i < V_L1_SIZE; i++) {
797 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
798 }
799 }
800
801 /* flush all the translation blocks */
802 /* XXX: tb_flush is currently not thread safe */
803 void tb_flush(CPUArchState *env1)
804 {
805 CPUArchState *env;
806 #if defined(DEBUG_FLUSH)
807 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
808 (unsigned long)(code_gen_ptr - code_gen_buffer),
809 nb_tbs, nb_tbs > 0 ?
810 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
811 #endif
812 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
813 cpu_abort(env1, "Internal error: code buffer overflow\n");
814
815 nb_tbs = 0;
816
817 for(env = first_cpu; env != NULL; env = env->next_cpu) {
818 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
819 }
820
821 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
822 page_flush_tb();
823
824 code_gen_ptr = code_gen_buffer;
825 /* XXX: flush processor icache at this point if cache flush is
826 expensive */
827 tb_flush_count++;
828 }
829
830 #ifdef DEBUG_TB_CHECK
831
832 static void tb_invalidate_check(target_ulong address)
833 {
834 TranslationBlock *tb;
835 int i;
836 address &= TARGET_PAGE_MASK;
837 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
838 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
839 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
840 address >= tb->pc + tb->size)) {
841 printf("ERROR invalidate: address=" TARGET_FMT_lx
842 " PC=%08lx size=%04x\n",
843 address, (long)tb->pc, tb->size);
844 }
845 }
846 }
847 }
848
849 /* verify that all the pages have correct rights for code */
850 static void tb_page_check(void)
851 {
852 TranslationBlock *tb;
853 int i, flags1, flags2;
854
855 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
856 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
857 flags1 = page_get_flags(tb->pc);
858 flags2 = page_get_flags(tb->pc + tb->size - 1);
859 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
860 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
861 (long)tb->pc, tb->size, flags1, flags2);
862 }
863 }
864 }
865 }
866
867 #endif
868
869 /* invalidate one TB */
870 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
871 int next_offset)
872 {
873 TranslationBlock *tb1;
874 for(;;) {
875 tb1 = *ptb;
876 if (tb1 == tb) {
877 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
878 break;
879 }
880 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
881 }
882 }
883
884 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
885 {
886 TranslationBlock *tb1;
887 unsigned int n1;
888
889 for(;;) {
890 tb1 = *ptb;
891 n1 = (uintptr_t)tb1 & 3;
892 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
893 if (tb1 == tb) {
894 *ptb = tb1->page_next[n1];
895 break;
896 }
897 ptb = &tb1->page_next[n1];
898 }
899 }
900
901 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
902 {
903 TranslationBlock *tb1, **ptb;
904 unsigned int n1;
905
906 ptb = &tb->jmp_next[n];
907 tb1 = *ptb;
908 if (tb1) {
909 /* find tb(n) in circular list */
910 for(;;) {
911 tb1 = *ptb;
912 n1 = (uintptr_t)tb1 & 3;
913 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
914 if (n1 == n && tb1 == tb)
915 break;
916 if (n1 == 2) {
917 ptb = &tb1->jmp_first;
918 } else {
919 ptb = &tb1->jmp_next[n1];
920 }
921 }
922 /* now we can suppress tb(n) from the list */
923 *ptb = tb->jmp_next[n];
924
925 tb->jmp_next[n] = NULL;
926 }
927 }
928
929 /* reset the jump entry 'n' of a TB so that it is not chained to
930 another TB */
931 static inline void tb_reset_jump(TranslationBlock *tb, int n)
932 {
933 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
934 }
935
936 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
937 {
938 CPUArchState *env;
939 PageDesc *p;
940 unsigned int h, n1;
941 tb_page_addr_t phys_pc;
942 TranslationBlock *tb1, *tb2;
943
944 /* remove the TB from the hash list */
945 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
946 h = tb_phys_hash_func(phys_pc);
947 tb_remove(&tb_phys_hash[h], tb,
948 offsetof(TranslationBlock, phys_hash_next));
949
950 /* remove the TB from the page list */
951 if (tb->page_addr[0] != page_addr) {
952 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
953 tb_page_remove(&p->first_tb, tb);
954 invalidate_page_bitmap(p);
955 }
956 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
957 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
958 tb_page_remove(&p->first_tb, tb);
959 invalidate_page_bitmap(p);
960 }
961
962 tb_invalidated_flag = 1;
963
964 /* remove the TB from the hash list */
965 h = tb_jmp_cache_hash_func(tb->pc);
966 for(env = first_cpu; env != NULL; env = env->next_cpu) {
967 if (env->tb_jmp_cache[h] == tb)
968 env->tb_jmp_cache[h] = NULL;
969 }
970
971 /* suppress this TB from the two jump lists */
972 tb_jmp_remove(tb, 0);
973 tb_jmp_remove(tb, 1);
974
975 /* suppress any remaining jumps to this TB */
976 tb1 = tb->jmp_first;
977 for(;;) {
978 n1 = (uintptr_t)tb1 & 3;
979 if (n1 == 2)
980 break;
981 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
982 tb2 = tb1->jmp_next[n1];
983 tb_reset_jump(tb1, n1);
984 tb1->jmp_next[n1] = NULL;
985 tb1 = tb2;
986 }
987 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
988
989 tb_phys_invalidate_count++;
990 }
991
992 static inline void set_bits(uint8_t *tab, int start, int len)
993 {
994 int end, mask, end1;
995
996 end = start + len;
997 tab += start >> 3;
998 mask = 0xff << (start & 7);
999 if ((start & ~7) == (end & ~7)) {
1000 if (start < end) {
1001 mask &= ~(0xff << (end & 7));
1002 *tab |= mask;
1003 }
1004 } else {
1005 *tab++ |= mask;
1006 start = (start + 8) & ~7;
1007 end1 = end & ~7;
1008 while (start < end1) {
1009 *tab++ = 0xff;
1010 start += 8;
1011 }
1012 if (start < end) {
1013 mask = ~(0xff << (end & 7));
1014 *tab |= mask;
1015 }
1016 }
1017 }
1018
1019 static void build_page_bitmap(PageDesc *p)
1020 {
1021 int n, tb_start, tb_end;
1022 TranslationBlock *tb;
1023
1024 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1025
1026 tb = p->first_tb;
1027 while (tb != NULL) {
1028 n = (uintptr_t)tb & 3;
1029 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1030 /* NOTE: this is subtle as a TB may span two physical pages */
1031 if (n == 0) {
1032 /* NOTE: tb_end may be after the end of the page, but
1033 it is not a problem */
1034 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1035 tb_end = tb_start + tb->size;
1036 if (tb_end > TARGET_PAGE_SIZE)
1037 tb_end = TARGET_PAGE_SIZE;
1038 } else {
1039 tb_start = 0;
1040 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1041 }
1042 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1043 tb = tb->page_next[n];
1044 }
1045 }
1046
1047 TranslationBlock *tb_gen_code(CPUArchState *env,
1048 target_ulong pc, target_ulong cs_base,
1049 int flags, int cflags)
1050 {
1051 TranslationBlock *tb;
1052 uint8_t *tc_ptr;
1053 tb_page_addr_t phys_pc, phys_page2;
1054 target_ulong virt_page2;
1055 int code_gen_size;
1056
1057 phys_pc = get_page_addr_code(env, pc);
1058 tb = tb_alloc(pc);
1059 if (!tb) {
1060 /* flush must be done */
1061 tb_flush(env);
1062 /* cannot fail at this point */
1063 tb = tb_alloc(pc);
1064 /* Don't forget to invalidate previous TB info. */
1065 tb_invalidated_flag = 1;
1066 }
1067 tc_ptr = code_gen_ptr;
1068 tb->tc_ptr = tc_ptr;
1069 tb->cs_base = cs_base;
1070 tb->flags = flags;
1071 tb->cflags = cflags;
1072 cpu_gen_code(env, tb, &code_gen_size);
1073 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1074 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1075
1076 /* check next page if needed */
1077 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1078 phys_page2 = -1;
1079 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1080 phys_page2 = get_page_addr_code(env, virt_page2);
1081 }
1082 tb_link_page(tb, phys_pc, phys_page2);
1083 return tb;
1084 }
1085
1086 /*
1087 * Invalidate all TBs which intersect with the target physical address range
1088 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1089 * 'is_cpu_write_access' should be true if called from a real cpu write
1090 * access: the virtual CPU will exit the current TB if code is modified inside
1091 * this TB.
1092 */
1093 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1094 int is_cpu_write_access)
1095 {
1096 while (start < end) {
1097 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1098 start &= TARGET_PAGE_MASK;
1099 start += TARGET_PAGE_SIZE;
1100 }
1101 }
1102
1103 /*
1104 * Invalidate all TBs which intersect with the target physical address range
1105 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1106 * 'is_cpu_write_access' should be true if called from a real cpu write
1107 * access: the virtual CPU will exit the current TB if code is modified inside
1108 * this TB.
1109 */
1110 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1111 int is_cpu_write_access)
1112 {
1113 TranslationBlock *tb, *tb_next, *saved_tb;
1114 CPUArchState *env = cpu_single_env;
1115 tb_page_addr_t tb_start, tb_end;
1116 PageDesc *p;
1117 int n;
1118 #ifdef TARGET_HAS_PRECISE_SMC
1119 int current_tb_not_found = is_cpu_write_access;
1120 TranslationBlock *current_tb = NULL;
1121 int current_tb_modified = 0;
1122 target_ulong current_pc = 0;
1123 target_ulong current_cs_base = 0;
1124 int current_flags = 0;
1125 #endif /* TARGET_HAS_PRECISE_SMC */
1126
1127 p = page_find(start >> TARGET_PAGE_BITS);
1128 if (!p)
1129 return;
1130 if (!p->code_bitmap &&
1131 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1132 is_cpu_write_access) {
1133 /* build code bitmap */
1134 build_page_bitmap(p);
1135 }
1136
1137 /* we remove all the TBs in the range [start, end[ */
1138 /* XXX: see if in some cases it could be faster to invalidate all the code */
1139 tb = p->first_tb;
1140 while (tb != NULL) {
1141 n = (uintptr_t)tb & 3;
1142 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1143 tb_next = tb->page_next[n];
1144 /* NOTE: this is subtle as a TB may span two physical pages */
1145 if (n == 0) {
1146 /* NOTE: tb_end may be after the end of the page, but
1147 it is not a problem */
1148 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1149 tb_end = tb_start + tb->size;
1150 } else {
1151 tb_start = tb->page_addr[1];
1152 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1153 }
1154 if (!(tb_end <= start || tb_start >= end)) {
1155 #ifdef TARGET_HAS_PRECISE_SMC
1156 if (current_tb_not_found) {
1157 current_tb_not_found = 0;
1158 current_tb = NULL;
1159 if (env->mem_io_pc) {
1160 /* now we have a real cpu fault */
1161 current_tb = tb_find_pc(env->mem_io_pc);
1162 }
1163 }
1164 if (current_tb == tb &&
1165 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1166 /* If we are modifying the current TB, we must stop
1167 its execution. We could be more precise by checking
1168 that the modification is after the current PC, but it
1169 would require a specialized function to partially
1170 restore the CPU state */
1171
1172 current_tb_modified = 1;
1173 cpu_restore_state(current_tb, env, env->mem_io_pc);
1174 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1175 &current_flags);
1176 }
1177 #endif /* TARGET_HAS_PRECISE_SMC */
1178 /* we need to do that to handle the case where a signal
1179 occurs while doing tb_phys_invalidate() */
1180 saved_tb = NULL;
1181 if (env) {
1182 saved_tb = env->current_tb;
1183 env->current_tb = NULL;
1184 }
1185 tb_phys_invalidate(tb, -1);
1186 if (env) {
1187 env->current_tb = saved_tb;
1188 if (env->interrupt_request && env->current_tb)
1189 cpu_interrupt(env, env->interrupt_request);
1190 }
1191 }
1192 tb = tb_next;
1193 }
1194 #if !defined(CONFIG_USER_ONLY)
1195 /* if no code remaining, no need to continue to use slow writes */
1196 if (!p->first_tb) {
1197 invalidate_page_bitmap(p);
1198 if (is_cpu_write_access) {
1199 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1200 }
1201 }
1202 #endif
1203 #ifdef TARGET_HAS_PRECISE_SMC
1204 if (current_tb_modified) {
1205 /* we generate a block containing just the instruction
1206 modifying the memory. It will ensure that it cannot modify
1207 itself */
1208 env->current_tb = NULL;
1209 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1210 cpu_resume_from_signal(env, NULL);
1211 }
1212 #endif
1213 }
1214
1215 /* len must be <= 8 and start must be a multiple of len */
1216 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1217 {
1218 PageDesc *p;
1219 int offset, b;
1220 #if 0
1221 if (1) {
1222 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1223 cpu_single_env->mem_io_vaddr, len,
1224 cpu_single_env->eip,
1225 cpu_single_env->eip +
1226 (intptr_t)cpu_single_env->segs[R_CS].base);
1227 }
1228 #endif
1229 p = page_find(start >> TARGET_PAGE_BITS);
1230 if (!p)
1231 return;
1232 if (p->code_bitmap) {
1233 offset = start & ~TARGET_PAGE_MASK;
1234 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1235 if (b & ((1 << len) - 1))
1236 goto do_invalidate;
1237 } else {
1238 do_invalidate:
1239 tb_invalidate_phys_page_range(start, start + len, 1);
1240 }
1241 }
1242
1243 #if !defined(CONFIG_SOFTMMU)
1244 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1245 uintptr_t pc, void *puc)
1246 {
1247 TranslationBlock *tb;
1248 PageDesc *p;
1249 int n;
1250 #ifdef TARGET_HAS_PRECISE_SMC
1251 TranslationBlock *current_tb = NULL;
1252 CPUArchState *env = cpu_single_env;
1253 int current_tb_modified = 0;
1254 target_ulong current_pc = 0;
1255 target_ulong current_cs_base = 0;
1256 int current_flags = 0;
1257 #endif
1258
1259 addr &= TARGET_PAGE_MASK;
1260 p = page_find(addr >> TARGET_PAGE_BITS);
1261 if (!p)
1262 return;
1263 tb = p->first_tb;
1264 #ifdef TARGET_HAS_PRECISE_SMC
1265 if (tb && pc != 0) {
1266 current_tb = tb_find_pc(pc);
1267 }
1268 #endif
1269 while (tb != NULL) {
1270 n = (uintptr_t)tb & 3;
1271 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1272 #ifdef TARGET_HAS_PRECISE_SMC
1273 if (current_tb == tb &&
1274 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1275 /* If we are modifying the current TB, we must stop
1276 its execution. We could be more precise by checking
1277 that the modification is after the current PC, but it
1278 would require a specialized function to partially
1279 restore the CPU state */
1280
1281 current_tb_modified = 1;
1282 cpu_restore_state(current_tb, env, pc);
1283 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1284 &current_flags);
1285 }
1286 #endif /* TARGET_HAS_PRECISE_SMC */
1287 tb_phys_invalidate(tb, addr);
1288 tb = tb->page_next[n];
1289 }
1290 p->first_tb = NULL;
1291 #ifdef TARGET_HAS_PRECISE_SMC
1292 if (current_tb_modified) {
1293 /* we generate a block containing just the instruction
1294 modifying the memory. It will ensure that it cannot modify
1295 itself */
1296 env->current_tb = NULL;
1297 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1298 cpu_resume_from_signal(env, puc);
1299 }
1300 #endif
1301 }
1302 #endif
1303
1304 /* add the tb in the target page and protect it if necessary */
1305 static inline void tb_alloc_page(TranslationBlock *tb,
1306 unsigned int n, tb_page_addr_t page_addr)
1307 {
1308 PageDesc *p;
1309 #ifndef CONFIG_USER_ONLY
1310 bool page_already_protected;
1311 #endif
1312
1313 tb->page_addr[n] = page_addr;
1314 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1315 tb->page_next[n] = p->first_tb;
1316 #ifndef CONFIG_USER_ONLY
1317 page_already_protected = p->first_tb != NULL;
1318 #endif
1319 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1320 invalidate_page_bitmap(p);
1321
1322 #if defined(TARGET_HAS_SMC) || 1
1323
1324 #if defined(CONFIG_USER_ONLY)
1325 if (p->flags & PAGE_WRITE) {
1326 target_ulong addr;
1327 PageDesc *p2;
1328 int prot;
1329
1330 /* force the host page as non writable (writes will have a
1331 page fault + mprotect overhead) */
1332 page_addr &= qemu_host_page_mask;
1333 prot = 0;
1334 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1335 addr += TARGET_PAGE_SIZE) {
1336
1337 p2 = page_find (addr >> TARGET_PAGE_BITS);
1338 if (!p2)
1339 continue;
1340 prot |= p2->flags;
1341 p2->flags &= ~PAGE_WRITE;
1342 }
1343 mprotect(g2h(page_addr), qemu_host_page_size,
1344 (prot & PAGE_BITS) & ~PAGE_WRITE);
1345 #ifdef DEBUG_TB_INVALIDATE
1346 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1347 page_addr);
1348 #endif
1349 }
1350 #else
1351 /* if some code is already present, then the pages are already
1352 protected. So we handle the case where only the first TB is
1353 allocated in a physical page */
1354 if (!page_already_protected) {
1355 tlb_protect_code(page_addr);
1356 }
1357 #endif
1358
1359 #endif /* TARGET_HAS_SMC */
1360 }
1361
1362 /* add a new TB and link it to the physical page tables. phys_page2 is
1363 (-1) to indicate that only one page contains the TB. */
1364 void tb_link_page(TranslationBlock *tb,
1365 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1366 {
1367 unsigned int h;
1368 TranslationBlock **ptb;
1369
1370 /* Grab the mmap lock to stop another thread invalidating this TB
1371 before we are done. */
1372 mmap_lock();
1373 /* add in the physical hash table */
1374 h = tb_phys_hash_func(phys_pc);
1375 ptb = &tb_phys_hash[h];
1376 tb->phys_hash_next = *ptb;
1377 *ptb = tb;
1378
1379 /* add in the page list */
1380 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1381 if (phys_page2 != -1)
1382 tb_alloc_page(tb, 1, phys_page2);
1383 else
1384 tb->page_addr[1] = -1;
1385
1386 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1387 tb->jmp_next[0] = NULL;
1388 tb->jmp_next[1] = NULL;
1389
1390 /* init original jump addresses */
1391 if (tb->tb_next_offset[0] != 0xffff)
1392 tb_reset_jump(tb, 0);
1393 if (tb->tb_next_offset[1] != 0xffff)
1394 tb_reset_jump(tb, 1);
1395
1396 #ifdef DEBUG_TB_CHECK
1397 tb_page_check();
1398 #endif
1399 mmap_unlock();
1400 }
1401
1402 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1403 tb[1].tc_ptr. Return NULL if not found */
1404 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1405 {
1406 int m_min, m_max, m;
1407 uintptr_t v;
1408 TranslationBlock *tb;
1409
1410 if (nb_tbs <= 0)
1411 return NULL;
1412 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1413 tc_ptr >= (uintptr_t)code_gen_ptr) {
1414 return NULL;
1415 }
1416 /* binary search (cf Knuth) */
1417 m_min = 0;
1418 m_max = nb_tbs - 1;
1419 while (m_min <= m_max) {
1420 m = (m_min + m_max) >> 1;
1421 tb = &tbs[m];
1422 v = (uintptr_t)tb->tc_ptr;
1423 if (v == tc_ptr)
1424 return tb;
1425 else if (tc_ptr < v) {
1426 m_max = m - 1;
1427 } else {
1428 m_min = m + 1;
1429 }
1430 }
1431 return &tbs[m_max];
1432 }
1433
1434 static void tb_reset_jump_recursive(TranslationBlock *tb);
1435
1436 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1437 {
1438 TranslationBlock *tb1, *tb_next, **ptb;
1439 unsigned int n1;
1440
1441 tb1 = tb->jmp_next[n];
1442 if (tb1 != NULL) {
1443 /* find head of list */
1444 for(;;) {
1445 n1 = (uintptr_t)tb1 & 3;
1446 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1447 if (n1 == 2)
1448 break;
1449 tb1 = tb1->jmp_next[n1];
1450 }
1451 /* we are now sure now that tb jumps to tb1 */
1452 tb_next = tb1;
1453
1454 /* remove tb from the jmp_first list */
1455 ptb = &tb_next->jmp_first;
1456 for(;;) {
1457 tb1 = *ptb;
1458 n1 = (uintptr_t)tb1 & 3;
1459 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1460 if (n1 == n && tb1 == tb)
1461 break;
1462 ptb = &tb1->jmp_next[n1];
1463 }
1464 *ptb = tb->jmp_next[n];
1465 tb->jmp_next[n] = NULL;
1466
1467 /* suppress the jump to next tb in generated code */
1468 tb_reset_jump(tb, n);
1469
1470 /* suppress jumps in the tb on which we could have jumped */
1471 tb_reset_jump_recursive(tb_next);
1472 }
1473 }
1474
1475 static void tb_reset_jump_recursive(TranslationBlock *tb)
1476 {
1477 tb_reset_jump_recursive2(tb, 0);
1478 tb_reset_jump_recursive2(tb, 1);
1479 }
1480
1481 #if defined(TARGET_HAS_ICE)
1482 #if defined(CONFIG_USER_ONLY)
1483 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1484 {
1485 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1486 }
1487 #else
1488 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1489 {
1490 ram_addr_t ram_addr;
1491 MemoryRegionSection *section;
1492
1493 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1494 if (!(memory_region_is_ram(section->mr)
1495 || (section->mr->rom_device && section->mr->readable))) {
1496 return;
1497 }
1498 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1499 + memory_region_section_addr(section, addr);
1500 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1501 }
1502
1503 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1504 {
1505 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1506 (pc & ~TARGET_PAGE_MASK));
1507 }
1508 #endif
1509 #endif /* TARGET_HAS_ICE */
1510
1511 #if defined(CONFIG_USER_ONLY)
1512 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1513
1514 {
1515 }
1516
1517 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1518 int flags, CPUWatchpoint **watchpoint)
1519 {
1520 return -ENOSYS;
1521 }
1522 #else
1523 /* Add a watchpoint. */
1524 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1525 int flags, CPUWatchpoint **watchpoint)
1526 {
1527 target_ulong len_mask = ~(len - 1);
1528 CPUWatchpoint *wp;
1529
1530 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1531 if ((len & (len - 1)) || (addr & ~len_mask) ||
1532 len == 0 || len > TARGET_PAGE_SIZE) {
1533 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1534 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1535 return -EINVAL;
1536 }
1537 wp = g_malloc(sizeof(*wp));
1538
1539 wp->vaddr = addr;
1540 wp->len_mask = len_mask;
1541 wp->flags = flags;
1542
1543 /* keep all GDB-injected watchpoints in front */
1544 if (flags & BP_GDB)
1545 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1546 else
1547 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1548
1549 tlb_flush_page(env, addr);
1550
1551 if (watchpoint)
1552 *watchpoint = wp;
1553 return 0;
1554 }
1555
1556 /* Remove a specific watchpoint. */
1557 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1558 int flags)
1559 {
1560 target_ulong len_mask = ~(len - 1);
1561 CPUWatchpoint *wp;
1562
1563 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1564 if (addr == wp->vaddr && len_mask == wp->len_mask
1565 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1566 cpu_watchpoint_remove_by_ref(env, wp);
1567 return 0;
1568 }
1569 }
1570 return -ENOENT;
1571 }
1572
1573 /* Remove a specific watchpoint by reference. */
1574 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1575 {
1576 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1577
1578 tlb_flush_page(env, watchpoint->vaddr);
1579
1580 g_free(watchpoint);
1581 }
1582
1583 /* Remove all matching watchpoints. */
1584 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1585 {
1586 CPUWatchpoint *wp, *next;
1587
1588 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1589 if (wp->flags & mask)
1590 cpu_watchpoint_remove_by_ref(env, wp);
1591 }
1592 }
1593 #endif
1594
1595 /* Add a breakpoint. */
1596 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1597 CPUBreakpoint **breakpoint)
1598 {
1599 #if defined(TARGET_HAS_ICE)
1600 CPUBreakpoint *bp;
1601
1602 bp = g_malloc(sizeof(*bp));
1603
1604 bp->pc = pc;
1605 bp->flags = flags;
1606
1607 /* keep all GDB-injected breakpoints in front */
1608 if (flags & BP_GDB)
1609 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1610 else
1611 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1612
1613 breakpoint_invalidate(env, pc);
1614
1615 if (breakpoint)
1616 *breakpoint = bp;
1617 return 0;
1618 #else
1619 return -ENOSYS;
1620 #endif
1621 }
1622
1623 /* Remove a specific breakpoint. */
1624 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1625 {
1626 #if defined(TARGET_HAS_ICE)
1627 CPUBreakpoint *bp;
1628
1629 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1630 if (bp->pc == pc && bp->flags == flags) {
1631 cpu_breakpoint_remove_by_ref(env, bp);
1632 return 0;
1633 }
1634 }
1635 return -ENOENT;
1636 #else
1637 return -ENOSYS;
1638 #endif
1639 }
1640
1641 /* Remove a specific breakpoint by reference. */
1642 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1643 {
1644 #if defined(TARGET_HAS_ICE)
1645 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1646
1647 breakpoint_invalidate(env, breakpoint->pc);
1648
1649 g_free(breakpoint);
1650 #endif
1651 }
1652
1653 /* Remove all matching breakpoints. */
1654 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1655 {
1656 #if defined(TARGET_HAS_ICE)
1657 CPUBreakpoint *bp, *next;
1658
1659 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1660 if (bp->flags & mask)
1661 cpu_breakpoint_remove_by_ref(env, bp);
1662 }
1663 #endif
1664 }
1665
1666 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1667 CPU loop after each instruction */
1668 void cpu_single_step(CPUArchState *env, int enabled)
1669 {
1670 #if defined(TARGET_HAS_ICE)
1671 if (env->singlestep_enabled != enabled) {
1672 env->singlestep_enabled = enabled;
1673 if (kvm_enabled())
1674 kvm_update_guest_debug(env, 0);
1675 else {
1676 /* must flush all the translated code to avoid inconsistencies */
1677 /* XXX: only flush what is necessary */
1678 tb_flush(env);
1679 }
1680 }
1681 #endif
1682 }
1683
1684 static void cpu_unlink_tb(CPUArchState *env)
1685 {
1686 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1687 problem and hope the cpu will stop of its own accord. For userspace
1688 emulation this often isn't actually as bad as it sounds. Often
1689 signals are used primarily to interrupt blocking syscalls. */
1690 TranslationBlock *tb;
1691 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1692
1693 spin_lock(&interrupt_lock);
1694 tb = env->current_tb;
1695 /* if the cpu is currently executing code, we must unlink it and
1696 all the potentially executing TB */
1697 if (tb) {
1698 env->current_tb = NULL;
1699 tb_reset_jump_recursive(tb);
1700 }
1701 spin_unlock(&interrupt_lock);
1702 }
1703
1704 #ifndef CONFIG_USER_ONLY
1705 /* mask must never be zero, except for A20 change call */
1706 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1707 {
1708 int old_mask;
1709
1710 old_mask = env->interrupt_request;
1711 env->interrupt_request |= mask;
1712
1713 /*
1714 * If called from iothread context, wake the target cpu in
1715 * case its halted.
1716 */
1717 if (!qemu_cpu_is_self(env)) {
1718 qemu_cpu_kick(env);
1719 return;
1720 }
1721
1722 if (use_icount) {
1723 env->icount_decr.u16.high = 0xffff;
1724 if (!can_do_io(env)
1725 && (mask & ~old_mask) != 0) {
1726 cpu_abort(env, "Raised interrupt while not in I/O function");
1727 }
1728 } else {
1729 cpu_unlink_tb(env);
1730 }
1731 }
1732
1733 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1734
1735 #else /* CONFIG_USER_ONLY */
1736
1737 void cpu_interrupt(CPUArchState *env, int mask)
1738 {
1739 env->interrupt_request |= mask;
1740 cpu_unlink_tb(env);
1741 }
1742 #endif /* CONFIG_USER_ONLY */
1743
1744 void cpu_reset_interrupt(CPUArchState *env, int mask)
1745 {
1746 env->interrupt_request &= ~mask;
1747 }
1748
1749 void cpu_exit(CPUArchState *env)
1750 {
1751 env->exit_request = 1;
1752 cpu_unlink_tb(env);
1753 }
1754
1755 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1756 {
1757 va_list ap;
1758 va_list ap2;
1759
1760 va_start(ap, fmt);
1761 va_copy(ap2, ap);
1762 fprintf(stderr, "qemu: fatal: ");
1763 vfprintf(stderr, fmt, ap);
1764 fprintf(stderr, "\n");
1765 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1766 if (qemu_log_enabled()) {
1767 qemu_log("qemu: fatal: ");
1768 qemu_log_vprintf(fmt, ap2);
1769 qemu_log("\n");
1770 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1771 qemu_log_flush();
1772 qemu_log_close();
1773 }
1774 va_end(ap2);
1775 va_end(ap);
1776 #if defined(CONFIG_USER_ONLY)
1777 {
1778 struct sigaction act;
1779 sigfillset(&act.sa_mask);
1780 act.sa_handler = SIG_DFL;
1781 sigaction(SIGABRT, &act, NULL);
1782 }
1783 #endif
1784 abort();
1785 }
1786
1787 CPUArchState *cpu_copy(CPUArchState *env)
1788 {
1789 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1790 CPUArchState *next_cpu = new_env->next_cpu;
1791 int cpu_index = new_env->cpu_index;
1792 #if defined(TARGET_HAS_ICE)
1793 CPUBreakpoint *bp;
1794 CPUWatchpoint *wp;
1795 #endif
1796
1797 memcpy(new_env, env, sizeof(CPUArchState));
1798
1799 /* Preserve chaining and index. */
1800 new_env->next_cpu = next_cpu;
1801 new_env->cpu_index = cpu_index;
1802
1803 /* Clone all break/watchpoints.
1804 Note: Once we support ptrace with hw-debug register access, make sure
1805 BP_CPU break/watchpoints are handled correctly on clone. */
1806 QTAILQ_INIT(&env->breakpoints);
1807 QTAILQ_INIT(&env->watchpoints);
1808 #if defined(TARGET_HAS_ICE)
1809 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1810 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1811 }
1812 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1813 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1814 wp->flags, NULL);
1815 }
1816 #endif
1817
1818 return new_env;
1819 }
1820
1821 #if !defined(CONFIG_USER_ONLY)
1822 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1823 {
1824 unsigned int i;
1825
1826 /* Discard jump cache entries for any tb which might potentially
1827 overlap the flushed page. */
1828 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1829 memset (&env->tb_jmp_cache[i], 0,
1830 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1831
1832 i = tb_jmp_cache_hash_page(addr);
1833 memset (&env->tb_jmp_cache[i], 0,
1834 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1835 }
1836
1837 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1838 uintptr_t length)
1839 {
1840 uintptr_t start1;
1841
1842 /* we modify the TLB cache so that the dirty bit will be set again
1843 when accessing the range */
1844 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1845 /* Check that we don't span multiple blocks - this breaks the
1846 address comparisons below. */
1847 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1848 != (end - 1) - start) {
1849 abort();
1850 }
1851 cpu_tlb_reset_dirty_all(start1, length);
1852
1853 }
1854
1855 /* Note: start and end must be within the same ram block. */
1856 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1857 int dirty_flags)
1858 {
1859 uintptr_t length;
1860
1861 start &= TARGET_PAGE_MASK;
1862 end = TARGET_PAGE_ALIGN(end);
1863
1864 length = end - start;
1865 if (length == 0)
1866 return;
1867 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1868
1869 if (tcg_enabled()) {
1870 tlb_reset_dirty_range_all(start, end, length);
1871 }
1872 }
1873
1874 int cpu_physical_memory_set_dirty_tracking(int enable)
1875 {
1876 int ret = 0;
1877 in_migration = enable;
1878 return ret;
1879 }
1880
1881 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1882 MemoryRegionSection *section,
1883 target_ulong vaddr,
1884 target_phys_addr_t paddr,
1885 int prot,
1886 target_ulong *address)
1887 {
1888 target_phys_addr_t iotlb;
1889 CPUWatchpoint *wp;
1890
1891 if (memory_region_is_ram(section->mr)) {
1892 /* Normal RAM. */
1893 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1894 + memory_region_section_addr(section, paddr);
1895 if (!section->readonly) {
1896 iotlb |= phys_section_notdirty;
1897 } else {
1898 iotlb |= phys_section_rom;
1899 }
1900 } else {
1901 /* IO handlers are currently passed a physical address.
1902 It would be nice to pass an offset from the base address
1903 of that region. This would avoid having to special case RAM,
1904 and avoid full address decoding in every device.
1905 We can't use the high bits of pd for this because
1906 IO_MEM_ROMD uses these as a ram address. */
1907 iotlb = section - phys_sections;
1908 iotlb += memory_region_section_addr(section, paddr);
1909 }
1910
1911 /* Make accesses to pages with watchpoints go via the
1912 watchpoint trap routines. */
1913 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1914 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1915 /* Avoid trapping reads of pages with a write breakpoint. */
1916 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1917 iotlb = phys_section_watch + paddr;
1918 *address |= TLB_MMIO;
1919 break;
1920 }
1921 }
1922 }
1923
1924 return iotlb;
1925 }
1926
1927 #else
1928 /*
1929 * Walks guest process memory "regions" one by one
1930 * and calls callback function 'fn' for each region.
1931 */
1932
1933 struct walk_memory_regions_data
1934 {
1935 walk_memory_regions_fn fn;
1936 void *priv;
1937 uintptr_t start;
1938 int prot;
1939 };
1940
1941 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1942 abi_ulong end, int new_prot)
1943 {
1944 if (data->start != -1ul) {
1945 int rc = data->fn(data->priv, data->start, end, data->prot);
1946 if (rc != 0) {
1947 return rc;
1948 }
1949 }
1950
1951 data->start = (new_prot ? end : -1ul);
1952 data->prot = new_prot;
1953
1954 return 0;
1955 }
1956
1957 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1958 abi_ulong base, int level, void **lp)
1959 {
1960 abi_ulong pa;
1961 int i, rc;
1962
1963 if (*lp == NULL) {
1964 return walk_memory_regions_end(data, base, 0);
1965 }
1966
1967 if (level == 0) {
1968 PageDesc *pd = *lp;
1969 for (i = 0; i < L2_SIZE; ++i) {
1970 int prot = pd[i].flags;
1971
1972 pa = base | (i << TARGET_PAGE_BITS);
1973 if (prot != data->prot) {
1974 rc = walk_memory_regions_end(data, pa, prot);
1975 if (rc != 0) {
1976 return rc;
1977 }
1978 }
1979 }
1980 } else {
1981 void **pp = *lp;
1982 for (i = 0; i < L2_SIZE; ++i) {
1983 pa = base | ((abi_ulong)i <<
1984 (TARGET_PAGE_BITS + L2_BITS * level));
1985 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1986 if (rc != 0) {
1987 return rc;
1988 }
1989 }
1990 }
1991
1992 return 0;
1993 }
1994
1995 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1996 {
1997 struct walk_memory_regions_data data;
1998 uintptr_t i;
1999
2000 data.fn = fn;
2001 data.priv = priv;
2002 data.start = -1ul;
2003 data.prot = 0;
2004
2005 for (i = 0; i < V_L1_SIZE; i++) {
2006 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2007 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2008 if (rc != 0) {
2009 return rc;
2010 }
2011 }
2012
2013 return walk_memory_regions_end(&data, 0, 0);
2014 }
2015
2016 static int dump_region(void *priv, abi_ulong start,
2017 abi_ulong end, unsigned long prot)
2018 {
2019 FILE *f = (FILE *)priv;
2020
2021 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2022 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2023 start, end, end - start,
2024 ((prot & PAGE_READ) ? 'r' : '-'),
2025 ((prot & PAGE_WRITE) ? 'w' : '-'),
2026 ((prot & PAGE_EXEC) ? 'x' : '-'));
2027
2028 return (0);
2029 }
2030
2031 /* dump memory mappings */
2032 void page_dump(FILE *f)
2033 {
2034 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2035 "start", "end", "size", "prot");
2036 walk_memory_regions(f, dump_region);
2037 }
2038
2039 int page_get_flags(target_ulong address)
2040 {
2041 PageDesc *p;
2042
2043 p = page_find(address >> TARGET_PAGE_BITS);
2044 if (!p)
2045 return 0;
2046 return p->flags;
2047 }
2048
2049 /* Modify the flags of a page and invalidate the code if necessary.
2050 The flag PAGE_WRITE_ORG is positioned automatically depending
2051 on PAGE_WRITE. The mmap_lock should already be held. */
2052 void page_set_flags(target_ulong start, target_ulong end, int flags)
2053 {
2054 target_ulong addr, len;
2055
2056 /* This function should never be called with addresses outside the
2057 guest address space. If this assert fires, it probably indicates
2058 a missing call to h2g_valid. */
2059 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2060 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2061 #endif
2062 assert(start < end);
2063
2064 start = start & TARGET_PAGE_MASK;
2065 end = TARGET_PAGE_ALIGN(end);
2066
2067 if (flags & PAGE_WRITE) {
2068 flags |= PAGE_WRITE_ORG;
2069 }
2070
2071 for (addr = start, len = end - start;
2072 len != 0;
2073 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2074 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2075
2076 /* If the write protection bit is set, then we invalidate
2077 the code inside. */
2078 if (!(p->flags & PAGE_WRITE) &&
2079 (flags & PAGE_WRITE) &&
2080 p->first_tb) {
2081 tb_invalidate_phys_page(addr, 0, NULL);
2082 }
2083 p->flags = flags;
2084 }
2085 }
2086
2087 int page_check_range(target_ulong start, target_ulong len, int flags)
2088 {
2089 PageDesc *p;
2090 target_ulong end;
2091 target_ulong addr;
2092
2093 /* This function should never be called with addresses outside the
2094 guest address space. If this assert fires, it probably indicates
2095 a missing call to h2g_valid. */
2096 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2097 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2098 #endif
2099
2100 if (len == 0) {
2101 return 0;
2102 }
2103 if (start + len - 1 < start) {
2104 /* We've wrapped around. */
2105 return -1;
2106 }
2107
2108 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2109 start = start & TARGET_PAGE_MASK;
2110
2111 for (addr = start, len = end - start;
2112 len != 0;
2113 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2114 p = page_find(addr >> TARGET_PAGE_BITS);
2115 if( !p )
2116 return -1;
2117 if( !(p->flags & PAGE_VALID) )
2118 return -1;
2119
2120 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2121 return -1;
2122 if (flags & PAGE_WRITE) {
2123 if (!(p->flags & PAGE_WRITE_ORG))
2124 return -1;
2125 /* unprotect the page if it was put read-only because it
2126 contains translated code */
2127 if (!(p->flags & PAGE_WRITE)) {
2128 if (!page_unprotect(addr, 0, NULL))
2129 return -1;
2130 }
2131 return 0;
2132 }
2133 }
2134 return 0;
2135 }
2136
2137 /* called from signal handler: invalidate the code and unprotect the
2138 page. Return TRUE if the fault was successfully handled. */
2139 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2140 {
2141 unsigned int prot;
2142 PageDesc *p;
2143 target_ulong host_start, host_end, addr;
2144
2145 /* Technically this isn't safe inside a signal handler. However we
2146 know this only ever happens in a synchronous SEGV handler, so in
2147 practice it seems to be ok. */
2148 mmap_lock();
2149
2150 p = page_find(address >> TARGET_PAGE_BITS);
2151 if (!p) {
2152 mmap_unlock();
2153 return 0;
2154 }
2155
2156 /* if the page was really writable, then we change its
2157 protection back to writable */
2158 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2159 host_start = address & qemu_host_page_mask;
2160 host_end = host_start + qemu_host_page_size;
2161
2162 prot = 0;
2163 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2164 p = page_find(addr >> TARGET_PAGE_BITS);
2165 p->flags |= PAGE_WRITE;
2166 prot |= p->flags;
2167
2168 /* and since the content will be modified, we must invalidate
2169 the corresponding translated code. */
2170 tb_invalidate_phys_page(addr, pc, puc);
2171 #ifdef DEBUG_TB_CHECK
2172 tb_invalidate_check(addr);
2173 #endif
2174 }
2175 mprotect((void *)g2h(host_start), qemu_host_page_size,
2176 prot & PAGE_BITS);
2177
2178 mmap_unlock();
2179 return 1;
2180 }
2181 mmap_unlock();
2182 return 0;
2183 }
2184 #endif /* defined(CONFIG_USER_ONLY) */
2185
2186 #if !defined(CONFIG_USER_ONLY)
2187
2188 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2189 typedef struct subpage_t {
2190 MemoryRegion iomem;
2191 target_phys_addr_t base;
2192 uint16_t sub_section[TARGET_PAGE_SIZE];
2193 } subpage_t;
2194
2195 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2196 uint16_t section);
2197 static subpage_t *subpage_init(target_phys_addr_t base);
2198 static void destroy_page_desc(uint16_t section_index)
2199 {
2200 MemoryRegionSection *section = &phys_sections[section_index];
2201 MemoryRegion *mr = section->mr;
2202
2203 if (mr->subpage) {
2204 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2205 memory_region_destroy(&subpage->iomem);
2206 g_free(subpage);
2207 }
2208 }
2209
2210 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2211 {
2212 unsigned i;
2213 PhysPageEntry *p;
2214
2215 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2216 return;
2217 }
2218
2219 p = phys_map_nodes[lp->ptr];
2220 for (i = 0; i < L2_SIZE; ++i) {
2221 if (!p[i].is_leaf) {
2222 destroy_l2_mapping(&p[i], level - 1);
2223 } else {
2224 destroy_page_desc(p[i].ptr);
2225 }
2226 }
2227 lp->is_leaf = 0;
2228 lp->ptr = PHYS_MAP_NODE_NIL;
2229 }
2230
2231 static void destroy_all_mappings(void)
2232 {
2233 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2234 phys_map_nodes_reset();
2235 }
2236
2237 static uint16_t phys_section_add(MemoryRegionSection *section)
2238 {
2239 if (phys_sections_nb == phys_sections_nb_alloc) {
2240 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2241 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2242 phys_sections_nb_alloc);
2243 }
2244 phys_sections[phys_sections_nb] = *section;
2245 return phys_sections_nb++;
2246 }
2247
2248 static void phys_sections_clear(void)
2249 {
2250 phys_sections_nb = 0;
2251 }
2252
2253 static void register_subpage(MemoryRegionSection *section)
2254 {
2255 subpage_t *subpage;
2256 target_phys_addr_t base = section->offset_within_address_space
2257 & TARGET_PAGE_MASK;
2258 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2259 MemoryRegionSection subsection = {
2260 .offset_within_address_space = base,
2261 .size = TARGET_PAGE_SIZE,
2262 };
2263 target_phys_addr_t start, end;
2264
2265 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2266
2267 if (!(existing->mr->subpage)) {
2268 subpage = subpage_init(base);
2269 subsection.mr = &subpage->iomem;
2270 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2271 phys_section_add(&subsection));
2272 } else {
2273 subpage = container_of(existing->mr, subpage_t, iomem);
2274 }
2275 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2276 end = start + section->size - 1;
2277 subpage_register(subpage, start, end, phys_section_add(section));
2278 }
2279
2280
2281 static void register_multipage(MemoryRegionSection *section)
2282 {
2283 target_phys_addr_t start_addr = section->offset_within_address_space;
2284 ram_addr_t size = section->size;
2285 target_phys_addr_t addr;
2286 uint16_t section_index = phys_section_add(section);
2287
2288 assert(size);
2289
2290 addr = start_addr;
2291 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2292 section_index);
2293 }
2294
2295 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2296 bool readonly)
2297 {
2298 MemoryRegionSection now = *section, remain = *section;
2299
2300 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2301 || (now.size < TARGET_PAGE_SIZE)) {
2302 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2303 - now.offset_within_address_space,
2304 now.size);
2305 register_subpage(&now);
2306 remain.size -= now.size;
2307 remain.offset_within_address_space += now.size;
2308 remain.offset_within_region += now.size;
2309 }
2310 while (remain.size >= TARGET_PAGE_SIZE) {
2311 now = remain;
2312 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2313 now.size = TARGET_PAGE_SIZE;
2314 register_subpage(&now);
2315 } else {
2316 now.size &= TARGET_PAGE_MASK;
2317 register_multipage(&now);
2318 }
2319 remain.size -= now.size;
2320 remain.offset_within_address_space += now.size;
2321 remain.offset_within_region += now.size;
2322 }
2323 now = remain;
2324 if (now.size) {
2325 register_subpage(&now);
2326 }
2327 }
2328
2329
2330 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2331 {
2332 if (kvm_enabled())
2333 kvm_coalesce_mmio_region(addr, size);
2334 }
2335
2336 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2337 {
2338 if (kvm_enabled())
2339 kvm_uncoalesce_mmio_region(addr, size);
2340 }
2341
2342 void qemu_flush_coalesced_mmio_buffer(void)
2343 {
2344 if (kvm_enabled())
2345 kvm_flush_coalesced_mmio_buffer();
2346 }
2347
2348 #if defined(__linux__) && !defined(TARGET_S390X)
2349
2350 #include <sys/vfs.h>
2351
2352 #define HUGETLBFS_MAGIC 0x958458f6
2353
2354 static long gethugepagesize(const char *path)
2355 {
2356 struct statfs fs;
2357 int ret;
2358
2359 do {
2360 ret = statfs(path, &fs);
2361 } while (ret != 0 && errno == EINTR);
2362
2363 if (ret != 0) {
2364 perror(path);
2365 return 0;
2366 }
2367
2368 if (fs.f_type != HUGETLBFS_MAGIC)
2369 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2370
2371 return fs.f_bsize;
2372 }
2373
2374 static void *file_ram_alloc(RAMBlock *block,
2375 ram_addr_t memory,
2376 const char *path)
2377 {
2378 char *filename;
2379 void *area;
2380 int fd;
2381 #ifdef MAP_POPULATE
2382 int flags;
2383 #endif
2384 unsigned long hpagesize;
2385
2386 hpagesize = gethugepagesize(path);
2387 if (!hpagesize) {
2388 return NULL;
2389 }
2390
2391 if (memory < hpagesize) {
2392 return NULL;
2393 }
2394
2395 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2396 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2397 return NULL;
2398 }
2399
2400 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2401 return NULL;
2402 }
2403
2404 fd = mkstemp(filename);
2405 if (fd < 0) {
2406 perror("unable to create backing store for hugepages");
2407 free(filename);
2408 return NULL;
2409 }
2410 unlink(filename);
2411 free(filename);
2412
2413 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2414
2415 /*
2416 * ftruncate is not supported by hugetlbfs in older
2417 * hosts, so don't bother bailing out on errors.
2418 * If anything goes wrong with it under other filesystems,
2419 * mmap will fail.
2420 */
2421 if (ftruncate(fd, memory))
2422 perror("ftruncate");
2423
2424 #ifdef MAP_POPULATE
2425 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2426 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2427 * to sidestep this quirk.
2428 */
2429 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2430 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2431 #else
2432 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2433 #endif
2434 if (area == MAP_FAILED) {
2435 perror("file_ram_alloc: can't mmap RAM pages");
2436 close(fd);
2437 return (NULL);
2438 }
2439 block->fd = fd;
2440 return area;
2441 }
2442 #endif
2443
2444 static ram_addr_t find_ram_offset(ram_addr_t size)
2445 {
2446 RAMBlock *block, *next_block;
2447 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2448
2449 if (QLIST_EMPTY(&ram_list.blocks))
2450 return 0;
2451
2452 QLIST_FOREACH(block, &ram_list.blocks, next) {
2453 ram_addr_t end, next = RAM_ADDR_MAX;
2454
2455 end = block->offset + block->length;
2456
2457 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2458 if (next_block->offset >= end) {
2459 next = MIN(next, next_block->offset);
2460 }
2461 }
2462 if (next - end >= size && next - end < mingap) {
2463 offset = end;
2464 mingap = next - end;
2465 }
2466 }
2467
2468 if (offset == RAM_ADDR_MAX) {
2469 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2470 (uint64_t)size);
2471 abort();
2472 }
2473
2474 return offset;
2475 }
2476
2477 static ram_addr_t last_ram_offset(void)
2478 {
2479 RAMBlock *block;
2480 ram_addr_t last = 0;
2481
2482 QLIST_FOREACH(block, &ram_list.blocks, next)
2483 last = MAX(last, block->offset + block->length);
2484
2485 return last;
2486 }
2487
2488 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2489 {
2490 int ret;
2491 QemuOpts *machine_opts;
2492
2493 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2494 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2495 if (machine_opts &&
2496 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2497 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2498 if (ret) {
2499 perror("qemu_madvise");
2500 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2501 "but dump_guest_core=off specified\n");
2502 }
2503 }
2504 }
2505
2506 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2507 {
2508 RAMBlock *new_block, *block;
2509
2510 new_block = NULL;
2511 QLIST_FOREACH(block, &ram_list.blocks, next) {
2512 if (block->offset == addr) {
2513 new_block = block;
2514 break;
2515 }
2516 }
2517 assert(new_block);
2518 assert(!new_block->idstr[0]);
2519
2520 if (dev) {
2521 char *id = qdev_get_dev_path(dev);
2522 if (id) {
2523 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2524 g_free(id);
2525 }
2526 }
2527 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2528
2529 QLIST_FOREACH(block, &ram_list.blocks, next) {
2530 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2531 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2532 new_block->idstr);
2533 abort();
2534 }
2535 }
2536 }
2537
2538 static int memory_try_enable_merging(void *addr, size_t len)
2539 {
2540 QemuOpts *opts;
2541
2542 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2543 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2544 /* disabled by the user */
2545 return 0;
2546 }
2547
2548 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2549 }
2550
2551 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2552 MemoryRegion *mr)
2553 {
2554 RAMBlock *new_block;
2555
2556 size = TARGET_PAGE_ALIGN(size);
2557 new_block = g_malloc0(sizeof(*new_block));
2558
2559 new_block->mr = mr;
2560 new_block->offset = find_ram_offset(size);
2561 if (host) {
2562 new_block->host = host;
2563 new_block->flags |= RAM_PREALLOC_MASK;
2564 } else {
2565 if (mem_path) {
2566 #if defined (__linux__) && !defined(TARGET_S390X)
2567 new_block->host = file_ram_alloc(new_block, size, mem_path);
2568 if (!new_block->host) {
2569 new_block->host = qemu_vmalloc(size);
2570 memory_try_enable_merging(new_block->host, size);
2571 }
2572 #else
2573 fprintf(stderr, "-mem-path option unsupported\n");
2574 exit(1);
2575 #endif
2576 } else {
2577 if (xen_enabled()) {
2578 xen_ram_alloc(new_block->offset, size, mr);
2579 } else if (kvm_enabled()) {
2580 /* some s390/kvm configurations have special constraints */
2581 new_block->host = kvm_vmalloc(size);
2582 } else {
2583 new_block->host = qemu_vmalloc(size);
2584 }
2585 memory_try_enable_merging(new_block->host, size);
2586 }
2587 }
2588 new_block->length = size;
2589
2590 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2591
2592 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2593 last_ram_offset() >> TARGET_PAGE_BITS);
2594 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2595 0, size >> TARGET_PAGE_BITS);
2596 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2597
2598 qemu_ram_setup_dump(new_block->host, size);
2599
2600 if (kvm_enabled())
2601 kvm_setup_guest_memory(new_block->host, size);
2602
2603 return new_block->offset;
2604 }
2605
2606 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2607 {
2608 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2609 }
2610
2611 void qemu_ram_free_from_ptr(ram_addr_t addr)
2612 {
2613 RAMBlock *block;
2614
2615 QLIST_FOREACH(block, &ram_list.blocks, next) {
2616 if (addr == block->offset) {
2617 QLIST_REMOVE(block, next);
2618 g_free(block);
2619 return;
2620 }
2621 }
2622 }
2623
2624 void qemu_ram_free(ram_addr_t addr)
2625 {
2626 RAMBlock *block;
2627
2628 QLIST_FOREACH(block, &ram_list.blocks, next) {
2629 if (addr == block->offset) {
2630 QLIST_REMOVE(block, next);
2631 if (block->flags & RAM_PREALLOC_MASK) {
2632 ;
2633 } else if (mem_path) {
2634 #if defined (__linux__) && !defined(TARGET_S390X)
2635 if (block->fd) {
2636 munmap(block->host, block->length);
2637 close(block->fd);
2638 } else {
2639 qemu_vfree(block->host);
2640 }
2641 #else
2642 abort();
2643 #endif
2644 } else {
2645 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2646 munmap(block->host, block->length);
2647 #else
2648 if (xen_enabled()) {
2649 xen_invalidate_map_cache_entry(block->host);
2650 } else {
2651 qemu_vfree(block->host);
2652 }
2653 #endif
2654 }
2655 g_free(block);
2656 return;
2657 }
2658 }
2659
2660 }
2661
2662 #ifndef _WIN32
2663 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2664 {
2665 RAMBlock *block;
2666 ram_addr_t offset;
2667 int flags;
2668 void *area, *vaddr;
2669
2670 QLIST_FOREACH(block, &ram_list.blocks, next) {
2671 offset = addr - block->offset;
2672 if (offset < block->length) {
2673 vaddr = block->host + offset;
2674 if (block->flags & RAM_PREALLOC_MASK) {
2675 ;
2676 } else {
2677 flags = MAP_FIXED;
2678 munmap(vaddr, length);
2679 if (mem_path) {
2680 #if defined(__linux__) && !defined(TARGET_S390X)
2681 if (block->fd) {
2682 #ifdef MAP_POPULATE
2683 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2684 MAP_PRIVATE;
2685 #else
2686 flags |= MAP_PRIVATE;
2687 #endif
2688 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2689 flags, block->fd, offset);
2690 } else {
2691 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2692 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2693 flags, -1, 0);
2694 }
2695 #else
2696 abort();
2697 #endif
2698 } else {
2699 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2700 flags |= MAP_SHARED | MAP_ANONYMOUS;
2701 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2702 flags, -1, 0);
2703 #else
2704 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2705 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2706 flags, -1, 0);
2707 #endif
2708 }
2709 if (area != vaddr) {
2710 fprintf(stderr, "Could not remap addr: "
2711 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2712 length, addr);
2713 exit(1);
2714 }
2715 memory_try_enable_merging(vaddr, length);
2716 qemu_ram_setup_dump(vaddr, length);
2717 }
2718 return;
2719 }
2720 }
2721 }
2722 #endif /* !_WIN32 */
2723
2724 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2725 With the exception of the softmmu code in this file, this should
2726 only be used for local memory (e.g. video ram) that the device owns,
2727 and knows it isn't going to access beyond the end of the block.
2728
2729 It should not be used for general purpose DMA.
2730 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2731 */
2732 void *qemu_get_ram_ptr(ram_addr_t addr)
2733 {
2734 RAMBlock *block;
2735
2736 QLIST_FOREACH(block, &ram_list.blocks, next) {
2737 if (addr - block->offset < block->length) {
2738 /* Move this entry to to start of the list. */
2739 if (block != QLIST_FIRST(&ram_list.blocks)) {
2740 QLIST_REMOVE(block, next);
2741 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2742 }
2743 if (xen_enabled()) {
2744 /* We need to check if the requested address is in the RAM
2745 * because we don't want to map the entire memory in QEMU.
2746 * In that case just map until the end of the page.
2747 */
2748 if (block->offset == 0) {
2749 return xen_map_cache(addr, 0, 0);
2750 } else if (block->host == NULL) {
2751 block->host =
2752 xen_map_cache(block->offset, block->length, 1);
2753 }
2754 }
2755 return block->host + (addr - block->offset);
2756 }
2757 }
2758
2759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2760 abort();
2761
2762 return NULL;
2763 }
2764
2765 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2766 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2767 */
2768 void *qemu_safe_ram_ptr(ram_addr_t addr)
2769 {
2770 RAMBlock *block;
2771
2772 QLIST_FOREACH(block, &ram_list.blocks, next) {
2773 if (addr - block->offset < block->length) {
2774 if (xen_enabled()) {
2775 /* We need to check if the requested address is in the RAM
2776 * because we don't want to map the entire memory in QEMU.
2777 * In that case just map until the end of the page.
2778 */
2779 if (block->offset == 0) {
2780 return xen_map_cache(addr, 0, 0);
2781 } else if (block->host == NULL) {
2782 block->host =
2783 xen_map_cache(block->offset, block->length, 1);
2784 }
2785 }
2786 return block->host + (addr - block->offset);
2787 }
2788 }
2789
2790 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2791 abort();
2792
2793 return NULL;
2794 }
2795
2796 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2797 * but takes a size argument */
2798 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2799 {
2800 if (*size == 0) {
2801 return NULL;
2802 }
2803 if (xen_enabled()) {
2804 return xen_map_cache(addr, *size, 1);
2805 } else {
2806 RAMBlock *block;
2807
2808 QLIST_FOREACH(block, &ram_list.blocks, next) {
2809 if (addr - block->offset < block->length) {
2810 if (addr - block->offset + *size > block->length)
2811 *size = block->length - addr + block->offset;
2812 return block->host + (addr - block->offset);
2813 }
2814 }
2815
2816 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2817 abort();
2818 }
2819 }
2820
2821 void qemu_put_ram_ptr(void *addr)
2822 {
2823 trace_qemu_put_ram_ptr(addr);
2824 }
2825
2826 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2827 {
2828 RAMBlock *block;
2829 uint8_t *host = ptr;
2830
2831 if (xen_enabled()) {
2832 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2833 return 0;
2834 }
2835
2836 QLIST_FOREACH(block, &ram_list.blocks, next) {
2837 /* This case append when the block is not mapped. */
2838 if (block->host == NULL) {
2839 continue;
2840 }
2841 if (host - block->host < block->length) {
2842 *ram_addr = block->offset + (host - block->host);
2843 return 0;
2844 }
2845 }
2846
2847 return -1;
2848 }
2849
2850 /* Some of the softmmu routines need to translate from a host pointer
2851 (typically a TLB entry) back to a ram offset. */
2852 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2853 {
2854 ram_addr_t ram_addr;
2855
2856 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2857 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2858 abort();
2859 }
2860 return ram_addr;
2861 }
2862
2863 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2864 unsigned size)
2865 {
2866 #ifdef DEBUG_UNASSIGNED
2867 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2868 #endif
2869 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2870 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2871 #endif
2872 return 0;
2873 }
2874
2875 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2876 uint64_t val, unsigned size)
2877 {
2878 #ifdef DEBUG_UNASSIGNED
2879 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2880 #endif
2881 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2882 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2883 #endif
2884 }
2885
2886 static const MemoryRegionOps unassigned_mem_ops = {
2887 .read = unassigned_mem_read,
2888 .write = unassigned_mem_write,
2889 .endianness = DEVICE_NATIVE_ENDIAN,
2890 };
2891
2892 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2893 unsigned size)
2894 {
2895 abort();
2896 }
2897
2898 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2899 uint64_t value, unsigned size)
2900 {
2901 abort();
2902 }
2903
2904 static const MemoryRegionOps error_mem_ops = {
2905 .read = error_mem_read,
2906 .write = error_mem_write,
2907 .endianness = DEVICE_NATIVE_ENDIAN,
2908 };
2909
2910 static const MemoryRegionOps rom_mem_ops = {
2911 .read = error_mem_read,
2912 .write = unassigned_mem_write,
2913 .endianness = DEVICE_NATIVE_ENDIAN,
2914 };
2915
2916 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2917 uint64_t val, unsigned size)
2918 {
2919 int dirty_flags;
2920 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2921 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2922 #if !defined(CONFIG_USER_ONLY)
2923 tb_invalidate_phys_page_fast(ram_addr, size);
2924 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2925 #endif
2926 }
2927 switch (size) {
2928 case 1:
2929 stb_p(qemu_get_ram_ptr(ram_addr), val);
2930 break;
2931 case 2:
2932 stw_p(qemu_get_ram_ptr(ram_addr), val);
2933 break;
2934 case 4:
2935 stl_p(qemu_get_ram_ptr(ram_addr), val);
2936 break;
2937 default:
2938 abort();
2939 }
2940 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2941 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2942 /* we remove the notdirty callback only if the code has been
2943 flushed */
2944 if (dirty_flags == 0xff)
2945 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2946 }
2947
2948 static const MemoryRegionOps notdirty_mem_ops = {
2949 .read = error_mem_read,
2950 .write = notdirty_mem_write,
2951 .endianness = DEVICE_NATIVE_ENDIAN,
2952 };
2953
2954 /* Generate a debug exception if a watchpoint has been hit. */
2955 static void check_watchpoint(int offset, int len_mask, int flags)
2956 {
2957 CPUArchState *env = cpu_single_env;
2958 target_ulong pc, cs_base;
2959 TranslationBlock *tb;
2960 target_ulong vaddr;
2961 CPUWatchpoint *wp;
2962 int cpu_flags;
2963
2964 if (env->watchpoint_hit) {
2965 /* We re-entered the check after replacing the TB. Now raise
2966 * the debug interrupt so that is will trigger after the
2967 * current instruction. */
2968 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2969 return;
2970 }
2971 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2972 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2973 if ((vaddr == (wp->vaddr & len_mask) ||
2974 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2975 wp->flags |= BP_WATCHPOINT_HIT;
2976 if (!env->watchpoint_hit) {
2977 env->watchpoint_hit = wp;
2978 tb = tb_find_pc(env->mem_io_pc);
2979 if (!tb) {
2980 cpu_abort(env, "check_watchpoint: could not find TB for "
2981 "pc=%p", (void *)env->mem_io_pc);
2982 }
2983 cpu_restore_state(tb, env, env->mem_io_pc);
2984 tb_phys_invalidate(tb, -1);
2985 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2986 env->exception_index = EXCP_DEBUG;
2987 cpu_loop_exit(env);
2988 } else {
2989 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2990 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2991 cpu_resume_from_signal(env, NULL);
2992 }
2993 }
2994 } else {
2995 wp->flags &= ~BP_WATCHPOINT_HIT;
2996 }
2997 }
2998 }
2999
3000 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3001 so these check for a hit then pass through to the normal out-of-line
3002 phys routines. */
3003 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3004 unsigned size)
3005 {
3006 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3007 switch (size) {
3008 case 1: return ldub_phys(addr);
3009 case 2: return lduw_phys(addr);
3010 case 4: return ldl_phys(addr);
3011 default: abort();
3012 }
3013 }
3014
3015 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3016 uint64_t val, unsigned size)
3017 {
3018 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3019 switch (size) {
3020 case 1:
3021 stb_phys(addr, val);
3022 break;
3023 case 2:
3024 stw_phys(addr, val);
3025 break;
3026 case 4:
3027 stl_phys(addr, val);
3028 break;
3029 default: abort();
3030 }
3031 }
3032
3033 static const MemoryRegionOps watch_mem_ops = {
3034 .read = watch_mem_read,
3035 .write = watch_mem_write,
3036 .endianness = DEVICE_NATIVE_ENDIAN,
3037 };
3038
3039 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3040 unsigned len)
3041 {
3042 subpage_t *mmio = opaque;
3043 unsigned int idx = SUBPAGE_IDX(addr);
3044 MemoryRegionSection *section;
3045 #if defined(DEBUG_SUBPAGE)
3046 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3047 mmio, len, addr, idx);
3048 #endif
3049
3050 section = &phys_sections[mmio->sub_section[idx]];
3051 addr += mmio->base;
3052 addr -= section->offset_within_address_space;
3053 addr += section->offset_within_region;
3054 return io_mem_read(section->mr, addr, len);
3055 }
3056
3057 static void subpage_write(void *opaque, target_phys_addr_t addr,
3058 uint64_t value, unsigned len)
3059 {
3060 subpage_t *mmio = opaque;
3061 unsigned int idx = SUBPAGE_IDX(addr);
3062 MemoryRegionSection *section;
3063 #if defined(DEBUG_SUBPAGE)
3064 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3065 " idx %d value %"PRIx64"\n",
3066 __func__, mmio, len, addr, idx, value);
3067 #endif
3068
3069 section = &phys_sections[mmio->sub_section[idx]];
3070 addr += mmio->base;
3071 addr -= section->offset_within_address_space;
3072 addr += section->offset_within_region;
3073 io_mem_write(section->mr, addr, value, len);
3074 }
3075
3076 static const MemoryRegionOps subpage_ops = {
3077 .read = subpage_read,
3078 .write = subpage_write,
3079 .endianness = DEVICE_NATIVE_ENDIAN,
3080 };
3081
3082 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3083 unsigned size)
3084 {
3085 ram_addr_t raddr = addr;
3086 void *ptr = qemu_get_ram_ptr(raddr);
3087 switch (size) {
3088 case 1: return ldub_p(ptr);
3089 case 2: return lduw_p(ptr);
3090 case 4: return ldl_p(ptr);
3091 default: abort();
3092 }
3093 }
3094
3095 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3096 uint64_t value, unsigned size)
3097 {
3098 ram_addr_t raddr = addr;
3099 void *ptr = qemu_get_ram_ptr(raddr);
3100 switch (size) {
3101 case 1: return stb_p(ptr, value);
3102 case 2: return stw_p(ptr, value);
3103 case 4: return stl_p(ptr, value);
3104 default: abort();
3105 }
3106 }
3107
3108 static const MemoryRegionOps subpage_ram_ops = {
3109 .read = subpage_ram_read,
3110 .write = subpage_ram_write,
3111 .endianness = DEVICE_NATIVE_ENDIAN,
3112 };
3113
3114 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3115 uint16_t section)
3116 {
3117 int idx, eidx;
3118
3119 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3120 return -1;
3121 idx = SUBPAGE_IDX(start);
3122 eidx = SUBPAGE_IDX(end);
3123 #if defined(DEBUG_SUBPAGE)
3124 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3125 mmio, start, end, idx, eidx, memory);
3126 #endif
3127 if (memory_region_is_ram(phys_sections[section].mr)) {
3128 MemoryRegionSection new_section = phys_sections[section];
3129 new_section.mr = &io_mem_subpage_ram;
3130 section = phys_section_add(&new_section);
3131 }
3132 for (; idx <= eidx; idx++) {
3133 mmio->sub_section[idx] = section;
3134 }
3135
3136 return 0;
3137 }
3138
3139 static subpage_t *subpage_init(target_phys_addr_t base)
3140 {
3141 subpage_t *mmio;
3142
3143 mmio = g_malloc0(sizeof(subpage_t));
3144
3145 mmio->base = base;
3146 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3147 "subpage", TARGET_PAGE_SIZE);
3148 mmio->iomem.subpage = true;
3149 #if defined(DEBUG_SUBPAGE)
3150 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3151 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3152 #endif
3153 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3154
3155 return mmio;
3156 }
3157
3158 static uint16_t dummy_section(MemoryRegion *mr)
3159 {
3160 MemoryRegionSection section = {
3161 .mr = mr,
3162 .offset_within_address_space = 0,
3163 .offset_within_region = 0,
3164 .size = UINT64_MAX,
3165 };
3166
3167 return phys_section_add(&section);
3168 }
3169
3170 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3171 {
3172 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3173 }
3174
3175 static void io_mem_init(void)
3176 {
3177 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3178 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3179 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3180 "unassigned", UINT64_MAX);
3181 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3182 "notdirty", UINT64_MAX);
3183 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3184 "subpage-ram", UINT64_MAX);
3185 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3186 "watch", UINT64_MAX);
3187 }
3188
3189 static void core_begin(MemoryListener *listener)
3190 {
3191 destroy_all_mappings();
3192 phys_sections_clear();
3193 phys_map.ptr = PHYS_MAP_NODE_NIL;
3194 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3195 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3196 phys_section_rom = dummy_section(&io_mem_rom);
3197 phys_section_watch = dummy_section(&io_mem_watch);
3198 }
3199
3200 static void core_commit(MemoryListener *listener)
3201 {
3202 CPUArchState *env;
3203
3204 /* since each CPU stores ram addresses in its TLB cache, we must
3205 reset the modified entries */
3206 /* XXX: slow ! */
3207 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3208 tlb_flush(env, 1);
3209 }
3210 }
3211
3212 static void core_region_add(MemoryListener *listener,
3213 MemoryRegionSection *section)
3214 {
3215 cpu_register_physical_memory_log(section, section->readonly);
3216 }
3217
3218 static void core_region_del(MemoryListener *listener,
3219 MemoryRegionSection *section)
3220 {
3221 }
3222
3223 static void core_region_nop(MemoryListener *listener,
3224 MemoryRegionSection *section)
3225 {
3226 cpu_register_physical_memory_log(section, section->readonly);
3227 }
3228
3229 static void core_log_start(MemoryListener *listener,
3230 MemoryRegionSection *section)
3231 {
3232 }
3233
3234 static void core_log_stop(MemoryListener *listener,
3235 MemoryRegionSection *section)
3236 {
3237 }
3238
3239 static void core_log_sync(MemoryListener *listener,
3240 MemoryRegionSection *section)
3241 {
3242 }
3243
3244 static void core_log_global_start(MemoryListener *listener)
3245 {
3246 cpu_physical_memory_set_dirty_tracking(1);
3247 }
3248
3249 static void core_log_global_stop(MemoryListener *listener)
3250 {
3251 cpu_physical_memory_set_dirty_tracking(0);
3252 }
3253
3254 static void core_eventfd_add(MemoryListener *listener,
3255 MemoryRegionSection *section,
3256 bool match_data, uint64_t data, EventNotifier *e)
3257 {
3258 }
3259
3260 static void core_eventfd_del(MemoryListener *listener,
3261 MemoryRegionSection *section,
3262 bool match_data, uint64_t data, EventNotifier *e)
3263 {
3264 }
3265
3266 static void io_begin(MemoryListener *listener)
3267 {
3268 }
3269
3270 static void io_commit(MemoryListener *listener)
3271 {
3272 }
3273
3274 static void io_region_add(MemoryListener *listener,
3275 MemoryRegionSection *section)
3276 {
3277 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3278
3279 mrio->mr = section->mr;
3280 mrio->offset = section->offset_within_region;
3281 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3282 section->offset_within_address_space, section->size);
3283 ioport_register(&mrio->iorange);
3284 }
3285
3286 static void io_region_del(MemoryListener *listener,
3287 MemoryRegionSection *section)
3288 {
3289 isa_unassign_ioport(section->offset_within_address_space, section->size);
3290 }
3291
3292 static void io_region_nop(MemoryListener *listener,
3293 MemoryRegionSection *section)
3294 {
3295 }
3296
3297 static void io_log_start(MemoryListener *listener,
3298 MemoryRegionSection *section)
3299 {
3300 }
3301
3302 static void io_log_stop(MemoryListener *listener,
3303 MemoryRegionSection *section)
3304 {
3305 }
3306
3307 static void io_log_sync(MemoryListener *listener,
3308 MemoryRegionSection *section)
3309 {
3310 }
3311
3312 static void io_log_global_start(MemoryListener *listener)
3313 {
3314 }
3315
3316 static void io_log_global_stop(MemoryListener *listener)
3317 {
3318 }
3319
3320 static void io_eventfd_add(MemoryListener *listener,
3321 MemoryRegionSection *section,
3322 bool match_data, uint64_t data, EventNotifier *e)
3323 {
3324 }
3325
3326 static void io_eventfd_del(MemoryListener *listener,
3327 MemoryRegionSection *section,
3328 bool match_data, uint64_t data, EventNotifier *e)
3329 {
3330 }
3331
3332 static MemoryListener core_memory_listener = {
3333 .begin = core_begin,
3334 .commit = core_commit,
3335 .region_add = core_region_add,
3336 .region_del = core_region_del,
3337 .region_nop = core_region_nop,
3338 .log_start = core_log_start,
3339 .log_stop = core_log_stop,
3340 .log_sync = core_log_sync,
3341 .log_global_start = core_log_global_start,
3342 .log_global_stop = core_log_global_stop,
3343 .eventfd_add = core_eventfd_add,
3344 .eventfd_del = core_eventfd_del,
3345 .priority = 0,
3346 };
3347
3348 static MemoryListener io_memory_listener = {
3349 .begin = io_begin,
3350 .commit = io_commit,
3351 .region_add = io_region_add,
3352 .region_del = io_region_del,
3353 .region_nop = io_region_nop,
3354 .log_start = io_log_start,
3355 .log_stop = io_log_stop,
3356 .log_sync = io_log_sync,
3357 .log_global_start = io_log_global_start,
3358 .log_global_stop = io_log_global_stop,
3359 .eventfd_add = io_eventfd_add,
3360 .eventfd_del = io_eventfd_del,
3361 .priority = 0,
3362 };
3363
3364 static void memory_map_init(void)
3365 {
3366 system_memory = g_malloc(sizeof(*system_memory));
3367 memory_region_init(system_memory, "system", INT64_MAX);
3368 set_system_memory_map(system_memory);
3369
3370 system_io = g_malloc(sizeof(*system_io));
3371 memory_region_init(system_io, "io", 65536);
3372 set_system_io_map(system_io);
3373
3374 memory_listener_register(&core_memory_listener, system_memory);
3375 memory_listener_register(&io_memory_listener, system_io);
3376 }
3377
3378 MemoryRegion *get_system_memory(void)
3379 {
3380 return system_memory;
3381 }
3382
3383 MemoryRegion *get_system_io(void)
3384 {
3385 return system_io;
3386 }
3387
3388 #endif /* !defined(CONFIG_USER_ONLY) */
3389
3390 /* physical memory access (slow version, mainly for debug) */
3391 #if defined(CONFIG_USER_ONLY)
3392 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3393 uint8_t *buf, int len, int is_write)
3394 {
3395 int l, flags;
3396 target_ulong page;
3397 void * p;
3398
3399 while (len > 0) {
3400 page = addr & TARGET_PAGE_MASK;
3401 l = (page + TARGET_PAGE_SIZE) - addr;
3402 if (l > len)
3403 l = len;
3404 flags = page_get_flags(page);
3405 if (!(flags & PAGE_VALID))
3406 return -1;
3407 if (is_write) {
3408 if (!(flags & PAGE_WRITE))
3409 return -1;
3410 /* XXX: this code should not depend on lock_user */
3411 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3412 return -1;
3413 memcpy(p, buf, l);
3414 unlock_user(p, addr, l);
3415 } else {
3416 if (!(flags & PAGE_READ))
3417 return -1;
3418 /* XXX: this code should not depend on lock_user */
3419 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3420 return -1;
3421 memcpy(buf, p, l);
3422 unlock_user(p, addr, 0);
3423 }
3424 len -= l;
3425 buf += l;
3426 addr += l;
3427 }
3428 return 0;
3429 }
3430
3431 #else
3432
3433 static void invalidate_and_set_dirty(target_phys_addr_t addr,
3434 target_phys_addr_t length)
3435 {
3436 if (!cpu_physical_memory_is_dirty(addr)) {
3437 /* invalidate code */
3438 tb_invalidate_phys_page_range(addr, addr + length, 0);
3439 /* set dirty bit */
3440 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3441 }
3442 xen_modified_memory(addr, length);
3443 }
3444
3445 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3446 int len, int is_write)
3447 {
3448 int l;
3449 uint8_t *ptr;
3450 uint32_t val;
3451 target_phys_addr_t page;
3452 MemoryRegionSection *section;
3453
3454 while (len > 0) {
3455 page = addr & TARGET_PAGE_MASK;
3456 l = (page + TARGET_PAGE_SIZE) - addr;
3457 if (l > len)
3458 l = len;
3459 section = phys_page_find(page >> TARGET_PAGE_BITS);
3460
3461 if (is_write) {
3462 if (!memory_region_is_ram(section->mr)) {
3463 target_phys_addr_t addr1;
3464 addr1 = memory_region_section_addr(section, addr);
3465 /* XXX: could force cpu_single_env to NULL to avoid
3466 potential bugs */
3467 if (l >= 4 && ((addr1 & 3) == 0)) {
3468 /* 32 bit write access */
3469 val = ldl_p(buf);
3470 io_mem_write(section->mr, addr1, val, 4);
3471 l = 4;
3472 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3473 /* 16 bit write access */
3474 val = lduw_p(buf);
3475 io_mem_write(section->mr, addr1, val, 2);
3476 l = 2;
3477 } else {
3478 /* 8 bit write access */
3479 val = ldub_p(buf);
3480 io_mem_write(section->mr, addr1, val, 1);
3481 l = 1;
3482 }
3483 } else if (!section->readonly) {
3484 ram_addr_t addr1;
3485 addr1 = memory_region_get_ram_addr(section->mr)
3486 + memory_region_section_addr(section, addr);
3487 /* RAM case */
3488 ptr = qemu_get_ram_ptr(addr1);
3489 memcpy(ptr, buf, l);
3490 invalidate_and_set_dirty(addr1, l);
3491 qemu_put_ram_ptr(ptr);
3492 }
3493 } else {
3494 if (!(memory_region_is_ram(section->mr) ||
3495 memory_region_is_romd(section->mr))) {
3496 target_phys_addr_t addr1;
3497 /* I/O case */
3498 addr1 = memory_region_section_addr(section, addr);
3499 if (l >= 4 && ((addr1 & 3) == 0)) {
3500 /* 32 bit read access */
3501 val = io_mem_read(section->mr, addr1, 4);
3502 stl_p(buf, val);
3503 l = 4;
3504 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3505 /* 16 bit read access */
3506 val = io_mem_read(section->mr, addr1, 2);
3507 stw_p(buf, val);
3508 l = 2;
3509 } else {
3510 /* 8 bit read access */
3511 val = io_mem_read(section->mr, addr1, 1);
3512 stb_p(buf, val);
3513 l = 1;
3514 }
3515 } else {
3516 /* RAM case */
3517 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3518 + memory_region_section_addr(section,
3519 addr));
3520 memcpy(buf, ptr, l);
3521 qemu_put_ram_ptr(ptr);
3522 }
3523 }
3524 len -= l;
3525 buf += l;
3526 addr += l;
3527 }
3528 }
3529
3530 /* used for ROM loading : can write in RAM and ROM */
3531 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3532 const uint8_t *buf, int len)
3533 {
3534 int l;
3535 uint8_t *ptr;
3536 target_phys_addr_t page;
3537 MemoryRegionSection *section;
3538
3539 while (len > 0) {
3540 page = addr & TARGET_PAGE_MASK;
3541 l = (page + TARGET_PAGE_SIZE) - addr;
3542 if (l > len)
3543 l = len;
3544 section = phys_page_find(page >> TARGET_PAGE_BITS);
3545
3546 if (!(memory_region_is_ram(section->mr) ||
3547 memory_region_is_romd(section->mr))) {
3548 /* do nothing */
3549 } else {
3550 unsigned long addr1;
3551 addr1 = memory_region_get_ram_addr(section->mr)
3552 + memory_region_section_addr(section, addr);
3553 /* ROM/RAM case */
3554 ptr = qemu_get_ram_ptr(addr1);
3555 memcpy(ptr, buf, l);
3556 invalidate_and_set_dirty(addr1, l);
3557 qemu_put_ram_ptr(ptr);
3558 }
3559 len -= l;
3560 buf += l;
3561 addr += l;
3562 }
3563 }
3564
3565 typedef struct {
3566 void *buffer;
3567 target_phys_addr_t addr;
3568 target_phys_addr_t len;
3569 } BounceBuffer;
3570
3571 static BounceBuffer bounce;
3572
3573 typedef struct MapClient {
3574 void *opaque;
3575 void (*callback)(void *opaque);
3576 QLIST_ENTRY(MapClient) link;
3577 } MapClient;
3578
3579 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3580 = QLIST_HEAD_INITIALIZER(map_client_list);
3581
3582 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3583 {
3584 MapClient *client = g_malloc(sizeof(*client));
3585
3586 client->opaque = opaque;
3587 client->callback = callback;
3588 QLIST_INSERT_HEAD(&map_client_list, client, link);
3589 return client;
3590 }
3591
3592 void cpu_unregister_map_client(void *_client)
3593 {
3594 MapClient *client = (MapClient *)_client;
3595
3596 QLIST_REMOVE(client, link);
3597 g_free(client);
3598 }
3599
3600 static void cpu_notify_map_clients(void)
3601 {
3602 MapClient *client;
3603
3604 while (!QLIST_EMPTY(&map_client_list)) {
3605 client = QLIST_FIRST(&map_client_list);
3606 client->callback(client->opaque);
3607 cpu_unregister_map_client(client);
3608 }
3609 }
3610
3611 /* Map a physical memory region into a host virtual address.
3612 * May map a subset of the requested range, given by and returned in *plen.
3613 * May return NULL if resources needed to perform the mapping are exhausted.
3614 * Use only for reads OR writes - not for read-modify-write operations.
3615 * Use cpu_register_map_client() to know when retrying the map operation is
3616 * likely to succeed.
3617 */
3618 void *cpu_physical_memory_map(target_phys_addr_t addr,
3619 target_phys_addr_t *plen,
3620 int is_write)
3621 {
3622 target_phys_addr_t len = *plen;
3623 target_phys_addr_t todo = 0;
3624 int l;
3625 target_phys_addr_t page;
3626 MemoryRegionSection *section;
3627 ram_addr_t raddr = RAM_ADDR_MAX;
3628 ram_addr_t rlen;
3629 void *ret;
3630
3631 while (len > 0) {
3632 page = addr & TARGET_PAGE_MASK;
3633 l = (page + TARGET_PAGE_SIZE) - addr;
3634 if (l > len)
3635 l = len;
3636 section = phys_page_find(page >> TARGET_PAGE_BITS);
3637
3638 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3639 if (todo || bounce.buffer) {
3640 break;
3641 }
3642 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3643 bounce.addr = addr;
3644 bounce.len = l;
3645 if (!is_write) {
3646 cpu_physical_memory_read(addr, bounce.buffer, l);
3647 }
3648
3649 *plen = l;
3650 return bounce.buffer;
3651 }
3652 if (!todo) {
3653 raddr = memory_region_get_ram_addr(section->mr)
3654 + memory_region_section_addr(section, addr);
3655 }
3656
3657 len -= l;
3658 addr += l;
3659 todo += l;
3660 }
3661 rlen = todo;
3662 ret = qemu_ram_ptr_length(raddr, &rlen);
3663 *plen = rlen;
3664 return ret;
3665 }
3666
3667 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3668 * Will also mark the memory as dirty if is_write == 1. access_len gives
3669 * the amount of memory that was actually read or written by the caller.
3670 */
3671 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3672 int is_write, target_phys_addr_t access_len)
3673 {
3674 if (buffer != bounce.buffer) {
3675 if (is_write) {
3676 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3677 while (access_len) {
3678 unsigned l;
3679 l = TARGET_PAGE_SIZE;
3680 if (l > access_len)
3681 l = access_len;
3682 invalidate_and_set_dirty(addr1, l);
3683 addr1 += l;
3684 access_len -= l;
3685 }
3686 }
3687 if (xen_enabled()) {
3688 xen_invalidate_map_cache_entry(buffer);
3689 }
3690 return;
3691 }
3692 if (is_write) {
3693 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3694 }
3695 qemu_vfree(bounce.buffer);
3696 bounce.buffer = NULL;
3697 cpu_notify_map_clients();
3698 }
3699
3700 /* warning: addr must be aligned */
3701 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3702 enum device_endian endian)
3703 {
3704 uint8_t *ptr;
3705 uint32_t val;
3706 MemoryRegionSection *section;
3707
3708 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3709
3710 if (!(memory_region_is_ram(section->mr) ||
3711 memory_region_is_romd(section->mr))) {
3712 /* I/O case */
3713 addr = memory_region_section_addr(section, addr);
3714 val = io_mem_read(section->mr, addr, 4);
3715 #if defined(TARGET_WORDS_BIGENDIAN)
3716 if (endian == DEVICE_LITTLE_ENDIAN) {
3717 val = bswap32(val);
3718 }
3719 #else
3720 if (endian == DEVICE_BIG_ENDIAN) {
3721 val = bswap32(val);
3722 }
3723 #endif
3724 } else {
3725 /* RAM case */
3726 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3727 & TARGET_PAGE_MASK)
3728 + memory_region_section_addr(section, addr));
3729 switch (endian) {
3730 case DEVICE_LITTLE_ENDIAN:
3731 val = ldl_le_p(ptr);
3732 break;
3733 case DEVICE_BIG_ENDIAN:
3734 val = ldl_be_p(ptr);
3735 break;
3736 default:
3737 val = ldl_p(ptr);
3738 break;
3739 }
3740 }
3741 return val;
3742 }
3743
3744 uint32_t ldl_phys(target_phys_addr_t addr)
3745 {
3746 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3747 }
3748
3749 uint32_t ldl_le_phys(target_phys_addr_t addr)
3750 {
3751 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3752 }
3753
3754 uint32_t ldl_be_phys(target_phys_addr_t addr)
3755 {
3756 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3757 }
3758
3759 /* warning: addr must be aligned */
3760 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3761 enum device_endian endian)
3762 {
3763 uint8_t *ptr;
3764 uint64_t val;
3765 MemoryRegionSection *section;
3766
3767 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3768
3769 if (!(memory_region_is_ram(section->mr) ||
3770 memory_region_is_romd(section->mr))) {
3771 /* I/O case */
3772 addr = memory_region_section_addr(section, addr);
3773
3774 /* XXX This is broken when device endian != cpu endian.
3775 Fix and add "endian" variable check */
3776 #ifdef TARGET_WORDS_BIGENDIAN
3777 val = io_mem_read(section->mr, addr, 4) << 32;
3778 val |= io_mem_read(section->mr, addr + 4, 4);
3779 #else
3780 val = io_mem_read(section->mr, addr, 4);
3781 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3782 #endif
3783 } else {
3784 /* RAM case */
3785 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3786 & TARGET_PAGE_MASK)
3787 + memory_region_section_addr(section, addr));
3788 switch (endian) {
3789 case DEVICE_LITTLE_ENDIAN:
3790 val = ldq_le_p(ptr);
3791 break;
3792 case DEVICE_BIG_ENDIAN:
3793 val = ldq_be_p(ptr);
3794 break;
3795 default:
3796 val = ldq_p(ptr);
3797 break;
3798 }
3799 }
3800 return val;
3801 }
3802
3803 uint64_t ldq_phys(target_phys_addr_t addr)
3804 {
3805 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3806 }
3807
3808 uint64_t ldq_le_phys(target_phys_addr_t addr)
3809 {
3810 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3811 }
3812
3813 uint64_t ldq_be_phys(target_phys_addr_t addr)
3814 {
3815 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3816 }
3817
3818 /* XXX: optimize */
3819 uint32_t ldub_phys(target_phys_addr_t addr)
3820 {
3821 uint8_t val;
3822 cpu_physical_memory_read(addr, &val, 1);
3823 return val;
3824 }
3825
3826 /* warning: addr must be aligned */
3827 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3828 enum device_endian endian)
3829 {
3830 uint8_t *ptr;
3831 uint64_t val;
3832 MemoryRegionSection *section;
3833
3834 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3835
3836 if (!(memory_region_is_ram(section->mr) ||
3837 memory_region_is_romd(section->mr))) {
3838 /* I/O case */
3839 addr = memory_region_section_addr(section, addr);
3840 val = io_mem_read(section->mr, addr, 2);
3841 #if defined(TARGET_WORDS_BIGENDIAN)
3842 if (endian == DEVICE_LITTLE_ENDIAN) {
3843 val = bswap16(val);
3844 }
3845 #else
3846 if (endian == DEVICE_BIG_ENDIAN) {
3847 val = bswap16(val);
3848 }
3849 #endif
3850 } else {
3851 /* RAM case */
3852 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3853 & TARGET_PAGE_MASK)
3854 + memory_region_section_addr(section, addr));
3855 switch (endian) {
3856 case DEVICE_LITTLE_ENDIAN:
3857 val = lduw_le_p(ptr);
3858 break;
3859 case DEVICE_BIG_ENDIAN:
3860 val = lduw_be_p(ptr);
3861 break;
3862 default:
3863 val = lduw_p(ptr);
3864 break;
3865 }
3866 }
3867 return val;
3868 }
3869
3870 uint32_t lduw_phys(target_phys_addr_t addr)
3871 {
3872 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3873 }
3874
3875 uint32_t lduw_le_phys(target_phys_addr_t addr)
3876 {
3877 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3878 }
3879
3880 uint32_t lduw_be_phys(target_phys_addr_t addr)
3881 {
3882 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3883 }
3884
3885 /* warning: addr must be aligned. The ram page is not masked as dirty
3886 and the code inside is not invalidated. It is useful if the dirty
3887 bits are used to track modified PTEs */
3888 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3889 {
3890 uint8_t *ptr;
3891 MemoryRegionSection *section;
3892
3893 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3894
3895 if (!memory_region_is_ram(section->mr) || section->readonly) {
3896 addr = memory_region_section_addr(section, addr);
3897 if (memory_region_is_ram(section->mr)) {
3898 section = &phys_sections[phys_section_rom];
3899 }
3900 io_mem_write(section->mr, addr, val, 4);
3901 } else {
3902 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3903 & TARGET_PAGE_MASK)
3904 + memory_region_section_addr(section, addr);
3905 ptr = qemu_get_ram_ptr(addr1);
3906 stl_p(ptr, val);
3907
3908 if (unlikely(in_migration)) {
3909 if (!cpu_physical_memory_is_dirty(addr1)) {
3910 /* invalidate code */
3911 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3912 /* set dirty bit */
3913 cpu_physical_memory_set_dirty_flags(
3914 addr1, (0xff & ~CODE_DIRTY_FLAG));
3915 }
3916 }
3917 }
3918 }
3919
3920 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3921 {
3922 uint8_t *ptr;
3923 MemoryRegionSection *section;
3924
3925 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3926
3927 if (!memory_region_is_ram(section->mr) || section->readonly) {
3928 addr = memory_region_section_addr(section, addr);
3929 if (memory_region_is_ram(section->mr)) {
3930 section = &phys_sections[phys_section_rom];
3931 }
3932 #ifdef TARGET_WORDS_BIGENDIAN
3933 io_mem_write(section->mr, addr, val >> 32, 4);
3934 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3935 #else
3936 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3937 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3938 #endif
3939 } else {
3940 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3941 & TARGET_PAGE_MASK)
3942 + memory_region_section_addr(section, addr));
3943 stq_p(ptr, val);
3944 }
3945 }
3946
3947 /* warning: addr must be aligned */
3948 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3949 enum device_endian endian)
3950 {
3951 uint8_t *ptr;
3952 MemoryRegionSection *section;
3953
3954 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3955
3956 if (!memory_region_is_ram(section->mr) || section->readonly) {
3957 addr = memory_region_section_addr(section, addr);
3958 if (memory_region_is_ram(section->mr)) {
3959 section = &phys_sections[phys_section_rom];
3960 }
3961 #if defined(TARGET_WORDS_BIGENDIAN)
3962 if (endian == DEVICE_LITTLE_ENDIAN) {
3963 val = bswap32(val);
3964 }
3965 #else
3966 if (endian == DEVICE_BIG_ENDIAN) {
3967 val = bswap32(val);
3968 }
3969 #endif
3970 io_mem_write(section->mr, addr, val, 4);
3971 } else {
3972 unsigned long addr1;
3973 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3974 + memory_region_section_addr(section, addr);
3975 /* RAM case */
3976 ptr = qemu_get_ram_ptr(addr1);
3977 switch (endian) {
3978 case DEVICE_LITTLE_ENDIAN:
3979 stl_le_p(ptr, val);
3980 break;
3981 case DEVICE_BIG_ENDIAN:
3982 stl_be_p(ptr, val);
3983 break;
3984 default:
3985 stl_p(ptr, val);
3986 break;
3987 }
3988 invalidate_and_set_dirty(addr1, 4);
3989 }
3990 }
3991
3992 void stl_phys(target_phys_addr_t addr, uint32_t val)
3993 {
3994 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3995 }
3996
3997 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3998 {
3999 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4000 }
4001
4002 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4003 {
4004 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4005 }
4006
4007 /* XXX: optimize */
4008 void stb_phys(target_phys_addr_t addr, uint32_t val)
4009 {
4010 uint8_t v = val;
4011 cpu_physical_memory_write(addr, &v, 1);
4012 }
4013
4014 /* warning: addr must be aligned */
4015 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4016 enum device_endian endian)
4017 {
4018 uint8_t *ptr;
4019 MemoryRegionSection *section;
4020
4021 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4022
4023 if (!memory_region_is_ram(section->mr) || section->readonly) {
4024 addr = memory_region_section_addr(section, addr);
4025 if (memory_region_is_ram(section->mr)) {
4026 section = &phys_sections[phys_section_rom];
4027 }
4028 #if defined(TARGET_WORDS_BIGENDIAN)
4029 if (endian == DEVICE_LITTLE_ENDIAN) {
4030 val = bswap16(val);
4031 }
4032 #else
4033 if (endian == DEVICE_BIG_ENDIAN) {
4034 val = bswap16(val);
4035 }
4036 #endif
4037 io_mem_write(section->mr, addr, val, 2);
4038 } else {
4039 unsigned long addr1;
4040 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4041 + memory_region_section_addr(section, addr);
4042 /* RAM case */
4043 ptr = qemu_get_ram_ptr(addr1);
4044 switch (endian) {
4045 case DEVICE_LITTLE_ENDIAN:
4046 stw_le_p(ptr, val);
4047 break;
4048 case DEVICE_BIG_ENDIAN:
4049 stw_be_p(ptr, val);
4050 break;
4051 default:
4052 stw_p(ptr, val);
4053 break;
4054 }
4055 invalidate_and_set_dirty(addr1, 2);
4056 }
4057 }
4058
4059 void stw_phys(target_phys_addr_t addr, uint32_t val)
4060 {
4061 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4062 }
4063
4064 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4065 {
4066 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4067 }
4068
4069 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4070 {
4071 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4072 }
4073
4074 /* XXX: optimize */
4075 void stq_phys(target_phys_addr_t addr, uint64_t val)
4076 {
4077 val = tswap64(val);
4078 cpu_physical_memory_write(addr, &val, 8);
4079 }
4080
4081 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4082 {
4083 val = cpu_to_le64(val);
4084 cpu_physical_memory_write(addr, &val, 8);
4085 }
4086
4087 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4088 {
4089 val = cpu_to_be64(val);
4090 cpu_physical_memory_write(addr, &val, 8);
4091 }
4092
4093 /* virtual memory access for debug (includes writing to ROM) */
4094 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4095 uint8_t *buf, int len, int is_write)
4096 {
4097 int l;
4098 target_phys_addr_t phys_addr;
4099 target_ulong page;
4100
4101 while (len > 0) {
4102 page = addr & TARGET_PAGE_MASK;
4103 phys_addr = cpu_get_phys_page_debug(env, page);
4104 /* if no physical page mapped, return an error */
4105 if (phys_addr == -1)
4106 return -1;
4107 l = (page + TARGET_PAGE_SIZE) - addr;
4108 if (l > len)
4109 l = len;
4110 phys_addr += (addr & ~TARGET_PAGE_MASK);
4111 if (is_write)
4112 cpu_physical_memory_write_rom(phys_addr, buf, l);
4113 else
4114 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4115 len -= l;
4116 buf += l;
4117 addr += l;
4118 }
4119 return 0;
4120 }
4121 #endif
4122
4123 /* in deterministic execution mode, instructions doing device I/Os
4124 must be at the end of the TB */
4125 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4126 {
4127 TranslationBlock *tb;
4128 uint32_t n, cflags;
4129 target_ulong pc, cs_base;
4130 uint64_t flags;
4131
4132 tb = tb_find_pc(retaddr);
4133 if (!tb) {
4134 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4135 (void *)retaddr);
4136 }
4137 n = env->icount_decr.u16.low + tb->icount;
4138 cpu_restore_state(tb, env, retaddr);
4139 /* Calculate how many instructions had been executed before the fault
4140 occurred. */
4141 n = n - env->icount_decr.u16.low;
4142 /* Generate a new TB ending on the I/O insn. */
4143 n++;
4144 /* On MIPS and SH, delay slot instructions can only be restarted if
4145 they were already the first instruction in the TB. If this is not
4146 the first instruction in a TB then re-execute the preceding
4147 branch. */
4148 #if defined(TARGET_MIPS)
4149 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4150 env->active_tc.PC -= 4;
4151 env->icount_decr.u16.low++;
4152 env->hflags &= ~MIPS_HFLAG_BMASK;
4153 }
4154 #elif defined(TARGET_SH4)
4155 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4156 && n > 1) {
4157 env->pc -= 2;
4158 env->icount_decr.u16.low++;
4159 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4160 }
4161 #endif
4162 /* This should never happen. */
4163 if (n > CF_COUNT_MASK)
4164 cpu_abort(env, "TB too big during recompile");
4165
4166 cflags = n | CF_LAST_IO;
4167 pc = tb->pc;
4168 cs_base = tb->cs_base;
4169 flags = tb->flags;
4170 tb_phys_invalidate(tb, -1);
4171 /* FIXME: In theory this could raise an exception. In practice
4172 we have already translated the block once so it's probably ok. */
4173 tb_gen_code(env, pc, cs_base, flags, cflags);
4174 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4175 the first in the TB) then we end up generating a whole new TB and
4176 repeating the fault, which is horribly inefficient.
4177 Better would be to execute just this insn uncached, or generate a
4178 second new TB. */
4179 cpu_resume_from_signal(env, NULL);
4180 }
4181
4182 #if !defined(CONFIG_USER_ONLY)
4183
4184 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4185 {
4186 int i, target_code_size, max_target_code_size;
4187 int direct_jmp_count, direct_jmp2_count, cross_page;
4188 TranslationBlock *tb;
4189
4190 target_code_size = 0;
4191 max_target_code_size = 0;
4192 cross_page = 0;
4193 direct_jmp_count = 0;
4194 direct_jmp2_count = 0;
4195 for(i = 0; i < nb_tbs; i++) {
4196 tb = &tbs[i];
4197 target_code_size += tb->size;
4198 if (tb->size > max_target_code_size)
4199 max_target_code_size = tb->size;
4200 if (tb->page_addr[1] != -1)
4201 cross_page++;
4202 if (tb->tb_next_offset[0] != 0xffff) {
4203 direct_jmp_count++;
4204 if (tb->tb_next_offset[1] != 0xffff) {
4205 direct_jmp2_count++;
4206 }
4207 }
4208 }
4209 /* XXX: avoid using doubles ? */
4210 cpu_fprintf(f, "Translation buffer state:\n");
4211 cpu_fprintf(f, "gen code size %td/%zd\n",
4212 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4213 cpu_fprintf(f, "TB count %d/%d\n",
4214 nb_tbs, code_gen_max_blocks);
4215 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4216 nb_tbs ? target_code_size / nb_tbs : 0,
4217 max_target_code_size);
4218 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4219 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4220 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4221 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4222 cross_page,
4223 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4224 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4225 direct_jmp_count,
4226 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4227 direct_jmp2_count,
4228 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4229 cpu_fprintf(f, "\nStatistics:\n");
4230 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4231 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4232 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4233 tcg_dump_info(f, cpu_fprintf);
4234 }
4235
4236 /*
4237 * A helper function for the _utterly broken_ virtio device model to find out if
4238 * it's running on a big endian machine. Don't do this at home kids!
4239 */
4240 bool virtio_is_big_endian(void);
4241 bool virtio_is_big_endian(void)
4242 {
4243 #if defined(TARGET_WORDS_BIGENDIAN)
4244 return true;
4245 #else
4246 return false;
4247 #endif
4248 }
4249
4250 #endif
4251
4252 #ifndef CONFIG_USER_ONLY
4253 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4254 {
4255 MemoryRegionSection *section;
4256
4257 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4258
4259 return !(memory_region_is_ram(section->mr) ||
4260 memory_region_is_romd(section->mr));
4261 }
4262 #endif