]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
kvm: use separate MemoryListeners for memory and I/O
[mirror_qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 #if defined(__arm__) || defined(__sparc__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32) && !defined(_WIN64)
96 #define code_gen_section \
97 __attribute__((aligned (16)))
98 #else
99 #define code_gen_section \
100 __attribute__((aligned (32)))
101 #endif
102
103 uint8_t code_gen_prologue[1024] code_gen_section;
104 static uint8_t *code_gen_buffer;
105 static unsigned long code_gen_buffer_size;
106 /* threshold to flush the translated code buffer */
107 static unsigned long code_gen_buffer_max_size;
108 static uint8_t *code_gen_ptr;
109
110 #if !defined(CONFIG_USER_ONLY)
111 int phys_ram_fd;
112 static int in_migration;
113
114 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
115
116 static MemoryRegion *system_memory;
117 static MemoryRegion *system_io;
118
119 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
120 static MemoryRegion io_mem_subpage_ram;
121
122 #endif
123
124 CPUArchState *first_cpu;
125 /* current CPU in the current thread. It is only valid inside
126 cpu_exec() */
127 DEFINE_TLS(CPUArchState *,cpu_single_env);
128 /* 0 = Do not count executed instructions.
129 1 = Precise instruction counting.
130 2 = Adaptive rate instruction counting. */
131 int use_icount = 0;
132
133 typedef struct PageDesc {
134 /* list of TBs intersecting this ram page */
135 TranslationBlock *first_tb;
136 /* in order to optimize self modifying code, we count the number
137 of lookups we do to a given page to use a bitmap */
138 unsigned int code_write_count;
139 uint8_t *code_bitmap;
140 #if defined(CONFIG_USER_ONLY)
141 unsigned long flags;
142 #endif
143 } PageDesc;
144
145 /* In system mode we want L1_MAP to be based on ram offsets,
146 while in user mode we want it to be based on virtual addresses. */
147 #if !defined(CONFIG_USER_ONLY)
148 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
149 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
150 #else
151 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
152 #endif
153 #else
154 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
155 #endif
156
157 /* Size of the L2 (and L3, etc) page tables. */
158 #define L2_BITS 10
159 #define L2_SIZE (1 << L2_BITS)
160
161 #define P_L2_LEVELS \
162 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
163
164 /* The bits remaining after N lower levels of page tables. */
165 #define V_L1_BITS_REM \
166 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
167
168 #if V_L1_BITS_REM < 4
169 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
170 #else
171 #define V_L1_BITS V_L1_BITS_REM
172 #endif
173
174 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
175
176 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
177
178 uintptr_t qemu_real_host_page_size;
179 uintptr_t qemu_host_page_size;
180 uintptr_t qemu_host_page_mask;
181
182 /* This is a multi-level map on the virtual address space.
183 The bottom level has pointers to PageDesc. */
184 static void *l1_map[V_L1_SIZE];
185
186 #if !defined(CONFIG_USER_ONLY)
187 typedef struct PhysPageEntry PhysPageEntry;
188
189 static MemoryRegionSection *phys_sections;
190 static unsigned phys_sections_nb, phys_sections_nb_alloc;
191 static uint16_t phys_section_unassigned;
192 static uint16_t phys_section_notdirty;
193 static uint16_t phys_section_rom;
194 static uint16_t phys_section_watch;
195
196 struct PhysPageEntry {
197 uint16_t is_leaf : 1;
198 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
199 uint16_t ptr : 15;
200 };
201
202 /* Simple allocator for PhysPageEntry nodes */
203 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
204 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
205
206 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
207
208 /* This is a multi-level map on the physical address space.
209 The bottom level has pointers to MemoryRegionSections. */
210 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
211
212 static void io_mem_init(void);
213 static void memory_map_init(void);
214
215 static MemoryRegion io_mem_watch;
216 #endif
217
218 /* statistics */
219 static int tb_flush_count;
220 static int tb_phys_invalidate_count;
221
222 #ifdef _WIN32
223 static void map_exec(void *addr, long size)
224 {
225 DWORD old_protect;
226 VirtualProtect(addr, size,
227 PAGE_EXECUTE_READWRITE, &old_protect);
228
229 }
230 #else
231 static void map_exec(void *addr, long size)
232 {
233 unsigned long start, end, page_size;
234
235 page_size = getpagesize();
236 start = (unsigned long)addr;
237 start &= ~(page_size - 1);
238
239 end = (unsigned long)addr + size;
240 end += page_size - 1;
241 end &= ~(page_size - 1);
242
243 mprotect((void *)start, end - start,
244 PROT_READ | PROT_WRITE | PROT_EXEC);
245 }
246 #endif
247
248 static void page_init(void)
249 {
250 /* NOTE: we can always suppose that qemu_host_page_size >=
251 TARGET_PAGE_SIZE */
252 #ifdef _WIN32
253 {
254 SYSTEM_INFO system_info;
255
256 GetSystemInfo(&system_info);
257 qemu_real_host_page_size = system_info.dwPageSize;
258 }
259 #else
260 qemu_real_host_page_size = getpagesize();
261 #endif
262 if (qemu_host_page_size == 0)
263 qemu_host_page_size = qemu_real_host_page_size;
264 if (qemu_host_page_size < TARGET_PAGE_SIZE)
265 qemu_host_page_size = TARGET_PAGE_SIZE;
266 qemu_host_page_mask = ~(qemu_host_page_size - 1);
267
268 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
269 {
270 #ifdef HAVE_KINFO_GETVMMAP
271 struct kinfo_vmentry *freep;
272 int i, cnt;
273
274 freep = kinfo_getvmmap(getpid(), &cnt);
275 if (freep) {
276 mmap_lock();
277 for (i = 0; i < cnt; i++) {
278 unsigned long startaddr, endaddr;
279
280 startaddr = freep[i].kve_start;
281 endaddr = freep[i].kve_end;
282 if (h2g_valid(startaddr)) {
283 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
284
285 if (h2g_valid(endaddr)) {
286 endaddr = h2g(endaddr);
287 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
288 } else {
289 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
290 endaddr = ~0ul;
291 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
292 #endif
293 }
294 }
295 }
296 free(freep);
297 mmap_unlock();
298 }
299 #else
300 FILE *f;
301
302 last_brk = (unsigned long)sbrk(0);
303
304 f = fopen("/compat/linux/proc/self/maps", "r");
305 if (f) {
306 mmap_lock();
307
308 do {
309 unsigned long startaddr, endaddr;
310 int n;
311
312 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
313
314 if (n == 2 && h2g_valid(startaddr)) {
315 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
316
317 if (h2g_valid(endaddr)) {
318 endaddr = h2g(endaddr);
319 } else {
320 endaddr = ~0ul;
321 }
322 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
323 }
324 } while (!feof(f));
325
326 fclose(f);
327 mmap_unlock();
328 }
329 #endif
330 }
331 #endif
332 }
333
334 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
335 {
336 PageDesc *pd;
337 void **lp;
338 int i;
339
340 #if defined(CONFIG_USER_ONLY)
341 /* We can't use g_malloc because it may recurse into a locked mutex. */
342 # define ALLOC(P, SIZE) \
343 do { \
344 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
345 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
346 } while (0)
347 #else
348 # define ALLOC(P, SIZE) \
349 do { P = g_malloc0(SIZE); } while (0)
350 #endif
351
352 /* Level 1. Always allocated. */
353 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
354
355 /* Level 2..N-1. */
356 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
357 void **p = *lp;
358
359 if (p == NULL) {
360 if (!alloc) {
361 return NULL;
362 }
363 ALLOC(p, sizeof(void *) * L2_SIZE);
364 *lp = p;
365 }
366
367 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
368 }
369
370 pd = *lp;
371 if (pd == NULL) {
372 if (!alloc) {
373 return NULL;
374 }
375 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
376 *lp = pd;
377 }
378
379 #undef ALLOC
380
381 return pd + (index & (L2_SIZE - 1));
382 }
383
384 static inline PageDesc *page_find(tb_page_addr_t index)
385 {
386 return page_find_alloc(index, 0);
387 }
388
389 #if !defined(CONFIG_USER_ONLY)
390
391 static void phys_map_node_reserve(unsigned nodes)
392 {
393 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
394 typedef PhysPageEntry Node[L2_SIZE];
395 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
396 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
397 phys_map_nodes_nb + nodes);
398 phys_map_nodes = g_renew(Node, phys_map_nodes,
399 phys_map_nodes_nb_alloc);
400 }
401 }
402
403 static uint16_t phys_map_node_alloc(void)
404 {
405 unsigned i;
406 uint16_t ret;
407
408 ret = phys_map_nodes_nb++;
409 assert(ret != PHYS_MAP_NODE_NIL);
410 assert(ret != phys_map_nodes_nb_alloc);
411 for (i = 0; i < L2_SIZE; ++i) {
412 phys_map_nodes[ret][i].is_leaf = 0;
413 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
414 }
415 return ret;
416 }
417
418 static void phys_map_nodes_reset(void)
419 {
420 phys_map_nodes_nb = 0;
421 }
422
423
424 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
425 target_phys_addr_t *nb, uint16_t leaf,
426 int level)
427 {
428 PhysPageEntry *p;
429 int i;
430 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
431
432 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
433 lp->ptr = phys_map_node_alloc();
434 p = phys_map_nodes[lp->ptr];
435 if (level == 0) {
436 for (i = 0; i < L2_SIZE; i++) {
437 p[i].is_leaf = 1;
438 p[i].ptr = phys_section_unassigned;
439 }
440 }
441 } else {
442 p = phys_map_nodes[lp->ptr];
443 }
444 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
445
446 while (*nb && lp < &p[L2_SIZE]) {
447 if ((*index & (step - 1)) == 0 && *nb >= step) {
448 lp->is_leaf = true;
449 lp->ptr = leaf;
450 *index += step;
451 *nb -= step;
452 } else {
453 phys_page_set_level(lp, index, nb, leaf, level - 1);
454 }
455 ++lp;
456 }
457 }
458
459 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
460 uint16_t leaf)
461 {
462 /* Wildly overreserve - it doesn't matter much. */
463 phys_map_node_reserve(3 * P_L2_LEVELS);
464
465 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
466 }
467
468 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
469 {
470 PhysPageEntry lp = phys_map;
471 PhysPageEntry *p;
472 int i;
473 uint16_t s_index = phys_section_unassigned;
474
475 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
476 if (lp.ptr == PHYS_MAP_NODE_NIL) {
477 goto not_found;
478 }
479 p = phys_map_nodes[lp.ptr];
480 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
481 }
482
483 s_index = lp.ptr;
484 not_found:
485 return &phys_sections[s_index];
486 }
487
488 bool memory_region_is_unassigned(MemoryRegion *mr)
489 {
490 return mr != &io_mem_ram && mr != &io_mem_rom
491 && mr != &io_mem_notdirty && !mr->rom_device
492 && mr != &io_mem_watch;
493 }
494
495 #define mmap_lock() do { } while(0)
496 #define mmap_unlock() do { } while(0)
497 #endif
498
499 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
500
501 #if defined(CONFIG_USER_ONLY)
502 /* Currently it is not recommended to allocate big chunks of data in
503 user mode. It will change when a dedicated libc will be used */
504 #define USE_STATIC_CODE_GEN_BUFFER
505 #endif
506
507 #ifdef USE_STATIC_CODE_GEN_BUFFER
508 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
509 __attribute__((aligned (CODE_GEN_ALIGN)));
510 #endif
511
512 static void code_gen_alloc(unsigned long tb_size)
513 {
514 #ifdef USE_STATIC_CODE_GEN_BUFFER
515 code_gen_buffer = static_code_gen_buffer;
516 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
517 map_exec(code_gen_buffer, code_gen_buffer_size);
518 #else
519 code_gen_buffer_size = tb_size;
520 if (code_gen_buffer_size == 0) {
521 #if defined(CONFIG_USER_ONLY)
522 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
523 #else
524 /* XXX: needs adjustments */
525 code_gen_buffer_size = (unsigned long)(ram_size / 4);
526 #endif
527 }
528 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
529 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
530 /* The code gen buffer location may have constraints depending on
531 the host cpu and OS */
532 #if defined(__linux__)
533 {
534 int flags;
535 void *start = NULL;
536
537 flags = MAP_PRIVATE | MAP_ANONYMOUS;
538 #if defined(__x86_64__)
539 flags |= MAP_32BIT;
540 /* Cannot map more than that */
541 if (code_gen_buffer_size > (800 * 1024 * 1024))
542 code_gen_buffer_size = (800 * 1024 * 1024);
543 #elif defined(__sparc__) && HOST_LONG_BITS == 64
544 // Map the buffer below 2G, so we can use direct calls and branches
545 start = (void *) 0x40000000UL;
546 if (code_gen_buffer_size > (512 * 1024 * 1024))
547 code_gen_buffer_size = (512 * 1024 * 1024);
548 #elif defined(__arm__)
549 /* Keep the buffer no bigger than 16MB to branch between blocks */
550 if (code_gen_buffer_size > 16 * 1024 * 1024)
551 code_gen_buffer_size = 16 * 1024 * 1024;
552 #elif defined(__s390x__)
553 /* Map the buffer so that we can use direct calls and branches. */
554 /* We have a +- 4GB range on the branches; leave some slop. */
555 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
556 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
557 }
558 start = (void *)0x90000000UL;
559 #endif
560 code_gen_buffer = mmap(start, code_gen_buffer_size,
561 PROT_WRITE | PROT_READ | PROT_EXEC,
562 flags, -1, 0);
563 if (code_gen_buffer == MAP_FAILED) {
564 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
565 exit(1);
566 }
567 }
568 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
569 || defined(__DragonFly__) || defined(__OpenBSD__) \
570 || defined(__NetBSD__)
571 {
572 int flags;
573 void *addr = NULL;
574 flags = MAP_PRIVATE | MAP_ANONYMOUS;
575 #if defined(__x86_64__)
576 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
577 * 0x40000000 is free */
578 flags |= MAP_FIXED;
579 addr = (void *)0x40000000;
580 /* Cannot map more than that */
581 if (code_gen_buffer_size > (800 * 1024 * 1024))
582 code_gen_buffer_size = (800 * 1024 * 1024);
583 #elif defined(__sparc__) && HOST_LONG_BITS == 64
584 // Map the buffer below 2G, so we can use direct calls and branches
585 addr = (void *) 0x40000000UL;
586 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
587 code_gen_buffer_size = (512 * 1024 * 1024);
588 }
589 #endif
590 code_gen_buffer = mmap(addr, code_gen_buffer_size,
591 PROT_WRITE | PROT_READ | PROT_EXEC,
592 flags, -1, 0);
593 if (code_gen_buffer == MAP_FAILED) {
594 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
595 exit(1);
596 }
597 }
598 #else
599 code_gen_buffer = g_malloc(code_gen_buffer_size);
600 map_exec(code_gen_buffer, code_gen_buffer_size);
601 #endif
602 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
603 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
604 code_gen_buffer_max_size = code_gen_buffer_size -
605 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
606 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
607 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
608 }
609
610 /* Must be called before using the QEMU cpus. 'tb_size' is the size
611 (in bytes) allocated to the translation buffer. Zero means default
612 size. */
613 void tcg_exec_init(unsigned long tb_size)
614 {
615 cpu_gen_init();
616 code_gen_alloc(tb_size);
617 code_gen_ptr = code_gen_buffer;
618 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
619 page_init();
620 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
621 /* There's no guest base to take into account, so go ahead and
622 initialize the prologue now. */
623 tcg_prologue_init(&tcg_ctx);
624 #endif
625 }
626
627 bool tcg_enabled(void)
628 {
629 return code_gen_buffer != NULL;
630 }
631
632 void cpu_exec_init_all(void)
633 {
634 #if !defined(CONFIG_USER_ONLY)
635 memory_map_init();
636 io_mem_init();
637 #endif
638 }
639
640 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
641
642 static int cpu_common_post_load(void *opaque, int version_id)
643 {
644 CPUArchState *env = opaque;
645
646 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
647 version_id is increased. */
648 env->interrupt_request &= ~0x01;
649 tlb_flush(env, 1);
650
651 return 0;
652 }
653
654 static const VMStateDescription vmstate_cpu_common = {
655 .name = "cpu_common",
656 .version_id = 1,
657 .minimum_version_id = 1,
658 .minimum_version_id_old = 1,
659 .post_load = cpu_common_post_load,
660 .fields = (VMStateField []) {
661 VMSTATE_UINT32(halted, CPUArchState),
662 VMSTATE_UINT32(interrupt_request, CPUArchState),
663 VMSTATE_END_OF_LIST()
664 }
665 };
666 #endif
667
668 CPUArchState *qemu_get_cpu(int cpu)
669 {
670 CPUArchState *env = first_cpu;
671
672 while (env) {
673 if (env->cpu_index == cpu)
674 break;
675 env = env->next_cpu;
676 }
677
678 return env;
679 }
680
681 void cpu_exec_init(CPUArchState *env)
682 {
683 CPUArchState **penv;
684 int cpu_index;
685
686 #if defined(CONFIG_USER_ONLY)
687 cpu_list_lock();
688 #endif
689 env->next_cpu = NULL;
690 penv = &first_cpu;
691 cpu_index = 0;
692 while (*penv != NULL) {
693 penv = &(*penv)->next_cpu;
694 cpu_index++;
695 }
696 env->cpu_index = cpu_index;
697 env->numa_node = 0;
698 QTAILQ_INIT(&env->breakpoints);
699 QTAILQ_INIT(&env->watchpoints);
700 #ifndef CONFIG_USER_ONLY
701 env->thread_id = qemu_get_thread_id();
702 #endif
703 *penv = env;
704 #if defined(CONFIG_USER_ONLY)
705 cpu_list_unlock();
706 #endif
707 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
708 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
709 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
710 cpu_save, cpu_load, env);
711 #endif
712 }
713
714 /* Allocate a new translation block. Flush the translation buffer if
715 too many translation blocks or too much generated code. */
716 static TranslationBlock *tb_alloc(target_ulong pc)
717 {
718 TranslationBlock *tb;
719
720 if (nb_tbs >= code_gen_max_blocks ||
721 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
722 return NULL;
723 tb = &tbs[nb_tbs++];
724 tb->pc = pc;
725 tb->cflags = 0;
726 return tb;
727 }
728
729 void tb_free(TranslationBlock *tb)
730 {
731 /* In practice this is mostly used for single use temporary TB
732 Ignore the hard cases and just back up if this TB happens to
733 be the last one generated. */
734 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
735 code_gen_ptr = tb->tc_ptr;
736 nb_tbs--;
737 }
738 }
739
740 static inline void invalidate_page_bitmap(PageDesc *p)
741 {
742 if (p->code_bitmap) {
743 g_free(p->code_bitmap);
744 p->code_bitmap = NULL;
745 }
746 p->code_write_count = 0;
747 }
748
749 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
750
751 static void page_flush_tb_1 (int level, void **lp)
752 {
753 int i;
754
755 if (*lp == NULL) {
756 return;
757 }
758 if (level == 0) {
759 PageDesc *pd = *lp;
760 for (i = 0; i < L2_SIZE; ++i) {
761 pd[i].first_tb = NULL;
762 invalidate_page_bitmap(pd + i);
763 }
764 } else {
765 void **pp = *lp;
766 for (i = 0; i < L2_SIZE; ++i) {
767 page_flush_tb_1 (level - 1, pp + i);
768 }
769 }
770 }
771
772 static void page_flush_tb(void)
773 {
774 int i;
775 for (i = 0; i < V_L1_SIZE; i++) {
776 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
777 }
778 }
779
780 /* flush all the translation blocks */
781 /* XXX: tb_flush is currently not thread safe */
782 void tb_flush(CPUArchState *env1)
783 {
784 CPUArchState *env;
785 #if defined(DEBUG_FLUSH)
786 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
787 (unsigned long)(code_gen_ptr - code_gen_buffer),
788 nb_tbs, nb_tbs > 0 ?
789 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
790 #endif
791 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
792 cpu_abort(env1, "Internal error: code buffer overflow\n");
793
794 nb_tbs = 0;
795
796 for(env = first_cpu; env != NULL; env = env->next_cpu) {
797 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
798 }
799
800 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
801 page_flush_tb();
802
803 code_gen_ptr = code_gen_buffer;
804 /* XXX: flush processor icache at this point if cache flush is
805 expensive */
806 tb_flush_count++;
807 }
808
809 #ifdef DEBUG_TB_CHECK
810
811 static void tb_invalidate_check(target_ulong address)
812 {
813 TranslationBlock *tb;
814 int i;
815 address &= TARGET_PAGE_MASK;
816 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
817 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
818 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
819 address >= tb->pc + tb->size)) {
820 printf("ERROR invalidate: address=" TARGET_FMT_lx
821 " PC=%08lx size=%04x\n",
822 address, (long)tb->pc, tb->size);
823 }
824 }
825 }
826 }
827
828 /* verify that all the pages have correct rights for code */
829 static void tb_page_check(void)
830 {
831 TranslationBlock *tb;
832 int i, flags1, flags2;
833
834 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
835 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
836 flags1 = page_get_flags(tb->pc);
837 flags2 = page_get_flags(tb->pc + tb->size - 1);
838 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
839 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
840 (long)tb->pc, tb->size, flags1, flags2);
841 }
842 }
843 }
844 }
845
846 #endif
847
848 /* invalidate one TB */
849 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
850 int next_offset)
851 {
852 TranslationBlock *tb1;
853 for(;;) {
854 tb1 = *ptb;
855 if (tb1 == tb) {
856 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
857 break;
858 }
859 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
860 }
861 }
862
863 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
864 {
865 TranslationBlock *tb1;
866 unsigned int n1;
867
868 for(;;) {
869 tb1 = *ptb;
870 n1 = (uintptr_t)tb1 & 3;
871 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
872 if (tb1 == tb) {
873 *ptb = tb1->page_next[n1];
874 break;
875 }
876 ptb = &tb1->page_next[n1];
877 }
878 }
879
880 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
881 {
882 TranslationBlock *tb1, **ptb;
883 unsigned int n1;
884
885 ptb = &tb->jmp_next[n];
886 tb1 = *ptb;
887 if (tb1) {
888 /* find tb(n) in circular list */
889 for(;;) {
890 tb1 = *ptb;
891 n1 = (uintptr_t)tb1 & 3;
892 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
893 if (n1 == n && tb1 == tb)
894 break;
895 if (n1 == 2) {
896 ptb = &tb1->jmp_first;
897 } else {
898 ptb = &tb1->jmp_next[n1];
899 }
900 }
901 /* now we can suppress tb(n) from the list */
902 *ptb = tb->jmp_next[n];
903
904 tb->jmp_next[n] = NULL;
905 }
906 }
907
908 /* reset the jump entry 'n' of a TB so that it is not chained to
909 another TB */
910 static inline void tb_reset_jump(TranslationBlock *tb, int n)
911 {
912 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
913 }
914
915 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
916 {
917 CPUArchState *env;
918 PageDesc *p;
919 unsigned int h, n1;
920 tb_page_addr_t phys_pc;
921 TranslationBlock *tb1, *tb2;
922
923 /* remove the TB from the hash list */
924 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
925 h = tb_phys_hash_func(phys_pc);
926 tb_remove(&tb_phys_hash[h], tb,
927 offsetof(TranslationBlock, phys_hash_next));
928
929 /* remove the TB from the page list */
930 if (tb->page_addr[0] != page_addr) {
931 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
932 tb_page_remove(&p->first_tb, tb);
933 invalidate_page_bitmap(p);
934 }
935 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
936 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
937 tb_page_remove(&p->first_tb, tb);
938 invalidate_page_bitmap(p);
939 }
940
941 tb_invalidated_flag = 1;
942
943 /* remove the TB from the hash list */
944 h = tb_jmp_cache_hash_func(tb->pc);
945 for(env = first_cpu; env != NULL; env = env->next_cpu) {
946 if (env->tb_jmp_cache[h] == tb)
947 env->tb_jmp_cache[h] = NULL;
948 }
949
950 /* suppress this TB from the two jump lists */
951 tb_jmp_remove(tb, 0);
952 tb_jmp_remove(tb, 1);
953
954 /* suppress any remaining jumps to this TB */
955 tb1 = tb->jmp_first;
956 for(;;) {
957 n1 = (uintptr_t)tb1 & 3;
958 if (n1 == 2)
959 break;
960 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
961 tb2 = tb1->jmp_next[n1];
962 tb_reset_jump(tb1, n1);
963 tb1->jmp_next[n1] = NULL;
964 tb1 = tb2;
965 }
966 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
967
968 tb_phys_invalidate_count++;
969 }
970
971 static inline void set_bits(uint8_t *tab, int start, int len)
972 {
973 int end, mask, end1;
974
975 end = start + len;
976 tab += start >> 3;
977 mask = 0xff << (start & 7);
978 if ((start & ~7) == (end & ~7)) {
979 if (start < end) {
980 mask &= ~(0xff << (end & 7));
981 *tab |= mask;
982 }
983 } else {
984 *tab++ |= mask;
985 start = (start + 8) & ~7;
986 end1 = end & ~7;
987 while (start < end1) {
988 *tab++ = 0xff;
989 start += 8;
990 }
991 if (start < end) {
992 mask = ~(0xff << (end & 7));
993 *tab |= mask;
994 }
995 }
996 }
997
998 static void build_page_bitmap(PageDesc *p)
999 {
1000 int n, tb_start, tb_end;
1001 TranslationBlock *tb;
1002
1003 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1004
1005 tb = p->first_tb;
1006 while (tb != NULL) {
1007 n = (uintptr_t)tb & 3;
1008 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1009 /* NOTE: this is subtle as a TB may span two physical pages */
1010 if (n == 0) {
1011 /* NOTE: tb_end may be after the end of the page, but
1012 it is not a problem */
1013 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1014 tb_end = tb_start + tb->size;
1015 if (tb_end > TARGET_PAGE_SIZE)
1016 tb_end = TARGET_PAGE_SIZE;
1017 } else {
1018 tb_start = 0;
1019 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1020 }
1021 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1022 tb = tb->page_next[n];
1023 }
1024 }
1025
1026 TranslationBlock *tb_gen_code(CPUArchState *env,
1027 target_ulong pc, target_ulong cs_base,
1028 int flags, int cflags)
1029 {
1030 TranslationBlock *tb;
1031 uint8_t *tc_ptr;
1032 tb_page_addr_t phys_pc, phys_page2;
1033 target_ulong virt_page2;
1034 int code_gen_size;
1035
1036 phys_pc = get_page_addr_code(env, pc);
1037 tb = tb_alloc(pc);
1038 if (!tb) {
1039 /* flush must be done */
1040 tb_flush(env);
1041 /* cannot fail at this point */
1042 tb = tb_alloc(pc);
1043 /* Don't forget to invalidate previous TB info. */
1044 tb_invalidated_flag = 1;
1045 }
1046 tc_ptr = code_gen_ptr;
1047 tb->tc_ptr = tc_ptr;
1048 tb->cs_base = cs_base;
1049 tb->flags = flags;
1050 tb->cflags = cflags;
1051 cpu_gen_code(env, tb, &code_gen_size);
1052 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1053 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1054
1055 /* check next page if needed */
1056 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1057 phys_page2 = -1;
1058 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1059 phys_page2 = get_page_addr_code(env, virt_page2);
1060 }
1061 tb_link_page(tb, phys_pc, phys_page2);
1062 return tb;
1063 }
1064
1065 /*
1066 * Invalidate all TBs which intersect with the target physical address range
1067 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1068 * 'is_cpu_write_access' should be true if called from a real cpu write
1069 * access: the virtual CPU will exit the current TB if code is modified inside
1070 * this TB.
1071 */
1072 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1073 int is_cpu_write_access)
1074 {
1075 while (start < end) {
1076 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1077 start &= TARGET_PAGE_MASK;
1078 start += TARGET_PAGE_SIZE;
1079 }
1080 }
1081
1082 /*
1083 * Invalidate all TBs which intersect with the target physical address range
1084 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1085 * 'is_cpu_write_access' should be true if called from a real cpu write
1086 * access: the virtual CPU will exit the current TB if code is modified inside
1087 * this TB.
1088 */
1089 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1090 int is_cpu_write_access)
1091 {
1092 TranslationBlock *tb, *tb_next, *saved_tb;
1093 CPUArchState *env = cpu_single_env;
1094 tb_page_addr_t tb_start, tb_end;
1095 PageDesc *p;
1096 int n;
1097 #ifdef TARGET_HAS_PRECISE_SMC
1098 int current_tb_not_found = is_cpu_write_access;
1099 TranslationBlock *current_tb = NULL;
1100 int current_tb_modified = 0;
1101 target_ulong current_pc = 0;
1102 target_ulong current_cs_base = 0;
1103 int current_flags = 0;
1104 #endif /* TARGET_HAS_PRECISE_SMC */
1105
1106 p = page_find(start >> TARGET_PAGE_BITS);
1107 if (!p)
1108 return;
1109 if (!p->code_bitmap &&
1110 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1111 is_cpu_write_access) {
1112 /* build code bitmap */
1113 build_page_bitmap(p);
1114 }
1115
1116 /* we remove all the TBs in the range [start, end[ */
1117 /* XXX: see if in some cases it could be faster to invalidate all the code */
1118 tb = p->first_tb;
1119 while (tb != NULL) {
1120 n = (uintptr_t)tb & 3;
1121 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1122 tb_next = tb->page_next[n];
1123 /* NOTE: this is subtle as a TB may span two physical pages */
1124 if (n == 0) {
1125 /* NOTE: tb_end may be after the end of the page, but
1126 it is not a problem */
1127 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1128 tb_end = tb_start + tb->size;
1129 } else {
1130 tb_start = tb->page_addr[1];
1131 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1132 }
1133 if (!(tb_end <= start || tb_start >= end)) {
1134 #ifdef TARGET_HAS_PRECISE_SMC
1135 if (current_tb_not_found) {
1136 current_tb_not_found = 0;
1137 current_tb = NULL;
1138 if (env->mem_io_pc) {
1139 /* now we have a real cpu fault */
1140 current_tb = tb_find_pc(env->mem_io_pc);
1141 }
1142 }
1143 if (current_tb == tb &&
1144 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1145 /* If we are modifying the current TB, we must stop
1146 its execution. We could be more precise by checking
1147 that the modification is after the current PC, but it
1148 would require a specialized function to partially
1149 restore the CPU state */
1150
1151 current_tb_modified = 1;
1152 cpu_restore_state(current_tb, env, env->mem_io_pc);
1153 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1154 &current_flags);
1155 }
1156 #endif /* TARGET_HAS_PRECISE_SMC */
1157 /* we need to do that to handle the case where a signal
1158 occurs while doing tb_phys_invalidate() */
1159 saved_tb = NULL;
1160 if (env) {
1161 saved_tb = env->current_tb;
1162 env->current_tb = NULL;
1163 }
1164 tb_phys_invalidate(tb, -1);
1165 if (env) {
1166 env->current_tb = saved_tb;
1167 if (env->interrupt_request && env->current_tb)
1168 cpu_interrupt(env, env->interrupt_request);
1169 }
1170 }
1171 tb = tb_next;
1172 }
1173 #if !defined(CONFIG_USER_ONLY)
1174 /* if no code remaining, no need to continue to use slow writes */
1175 if (!p->first_tb) {
1176 invalidate_page_bitmap(p);
1177 if (is_cpu_write_access) {
1178 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1179 }
1180 }
1181 #endif
1182 #ifdef TARGET_HAS_PRECISE_SMC
1183 if (current_tb_modified) {
1184 /* we generate a block containing just the instruction
1185 modifying the memory. It will ensure that it cannot modify
1186 itself */
1187 env->current_tb = NULL;
1188 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1189 cpu_resume_from_signal(env, NULL);
1190 }
1191 #endif
1192 }
1193
1194 /* len must be <= 8 and start must be a multiple of len */
1195 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1196 {
1197 PageDesc *p;
1198 int offset, b;
1199 #if 0
1200 if (1) {
1201 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1202 cpu_single_env->mem_io_vaddr, len,
1203 cpu_single_env->eip,
1204 cpu_single_env->eip +
1205 (intptr_t)cpu_single_env->segs[R_CS].base);
1206 }
1207 #endif
1208 p = page_find(start >> TARGET_PAGE_BITS);
1209 if (!p)
1210 return;
1211 if (p->code_bitmap) {
1212 offset = start & ~TARGET_PAGE_MASK;
1213 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1214 if (b & ((1 << len) - 1))
1215 goto do_invalidate;
1216 } else {
1217 do_invalidate:
1218 tb_invalidate_phys_page_range(start, start + len, 1);
1219 }
1220 }
1221
1222 #if !defined(CONFIG_SOFTMMU)
1223 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1224 uintptr_t pc, void *puc)
1225 {
1226 TranslationBlock *tb;
1227 PageDesc *p;
1228 int n;
1229 #ifdef TARGET_HAS_PRECISE_SMC
1230 TranslationBlock *current_tb = NULL;
1231 CPUArchState *env = cpu_single_env;
1232 int current_tb_modified = 0;
1233 target_ulong current_pc = 0;
1234 target_ulong current_cs_base = 0;
1235 int current_flags = 0;
1236 #endif
1237
1238 addr &= TARGET_PAGE_MASK;
1239 p = page_find(addr >> TARGET_PAGE_BITS);
1240 if (!p)
1241 return;
1242 tb = p->first_tb;
1243 #ifdef TARGET_HAS_PRECISE_SMC
1244 if (tb && pc != 0) {
1245 current_tb = tb_find_pc(pc);
1246 }
1247 #endif
1248 while (tb != NULL) {
1249 n = (uintptr_t)tb & 3;
1250 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1251 #ifdef TARGET_HAS_PRECISE_SMC
1252 if (current_tb == tb &&
1253 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1254 /* If we are modifying the current TB, we must stop
1255 its execution. We could be more precise by checking
1256 that the modification is after the current PC, but it
1257 would require a specialized function to partially
1258 restore the CPU state */
1259
1260 current_tb_modified = 1;
1261 cpu_restore_state(current_tb, env, pc);
1262 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1263 &current_flags);
1264 }
1265 #endif /* TARGET_HAS_PRECISE_SMC */
1266 tb_phys_invalidate(tb, addr);
1267 tb = tb->page_next[n];
1268 }
1269 p->first_tb = NULL;
1270 #ifdef TARGET_HAS_PRECISE_SMC
1271 if (current_tb_modified) {
1272 /* we generate a block containing just the instruction
1273 modifying the memory. It will ensure that it cannot modify
1274 itself */
1275 env->current_tb = NULL;
1276 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1277 cpu_resume_from_signal(env, puc);
1278 }
1279 #endif
1280 }
1281 #endif
1282
1283 /* add the tb in the target page and protect it if necessary */
1284 static inline void tb_alloc_page(TranslationBlock *tb,
1285 unsigned int n, tb_page_addr_t page_addr)
1286 {
1287 PageDesc *p;
1288 #ifndef CONFIG_USER_ONLY
1289 bool page_already_protected;
1290 #endif
1291
1292 tb->page_addr[n] = page_addr;
1293 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1294 tb->page_next[n] = p->first_tb;
1295 #ifndef CONFIG_USER_ONLY
1296 page_already_protected = p->first_tb != NULL;
1297 #endif
1298 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1299 invalidate_page_bitmap(p);
1300
1301 #if defined(TARGET_HAS_SMC) || 1
1302
1303 #if defined(CONFIG_USER_ONLY)
1304 if (p->flags & PAGE_WRITE) {
1305 target_ulong addr;
1306 PageDesc *p2;
1307 int prot;
1308
1309 /* force the host page as non writable (writes will have a
1310 page fault + mprotect overhead) */
1311 page_addr &= qemu_host_page_mask;
1312 prot = 0;
1313 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1314 addr += TARGET_PAGE_SIZE) {
1315
1316 p2 = page_find (addr >> TARGET_PAGE_BITS);
1317 if (!p2)
1318 continue;
1319 prot |= p2->flags;
1320 p2->flags &= ~PAGE_WRITE;
1321 }
1322 mprotect(g2h(page_addr), qemu_host_page_size,
1323 (prot & PAGE_BITS) & ~PAGE_WRITE);
1324 #ifdef DEBUG_TB_INVALIDATE
1325 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1326 page_addr);
1327 #endif
1328 }
1329 #else
1330 /* if some code is already present, then the pages are already
1331 protected. So we handle the case where only the first TB is
1332 allocated in a physical page */
1333 if (!page_already_protected) {
1334 tlb_protect_code(page_addr);
1335 }
1336 #endif
1337
1338 #endif /* TARGET_HAS_SMC */
1339 }
1340
1341 /* add a new TB and link it to the physical page tables. phys_page2 is
1342 (-1) to indicate that only one page contains the TB. */
1343 void tb_link_page(TranslationBlock *tb,
1344 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1345 {
1346 unsigned int h;
1347 TranslationBlock **ptb;
1348
1349 /* Grab the mmap lock to stop another thread invalidating this TB
1350 before we are done. */
1351 mmap_lock();
1352 /* add in the physical hash table */
1353 h = tb_phys_hash_func(phys_pc);
1354 ptb = &tb_phys_hash[h];
1355 tb->phys_hash_next = *ptb;
1356 *ptb = tb;
1357
1358 /* add in the page list */
1359 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1360 if (phys_page2 != -1)
1361 tb_alloc_page(tb, 1, phys_page2);
1362 else
1363 tb->page_addr[1] = -1;
1364
1365 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1366 tb->jmp_next[0] = NULL;
1367 tb->jmp_next[1] = NULL;
1368
1369 /* init original jump addresses */
1370 if (tb->tb_next_offset[0] != 0xffff)
1371 tb_reset_jump(tb, 0);
1372 if (tb->tb_next_offset[1] != 0xffff)
1373 tb_reset_jump(tb, 1);
1374
1375 #ifdef DEBUG_TB_CHECK
1376 tb_page_check();
1377 #endif
1378 mmap_unlock();
1379 }
1380
1381 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1382 tb[1].tc_ptr. Return NULL if not found */
1383 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1384 {
1385 int m_min, m_max, m;
1386 uintptr_t v;
1387 TranslationBlock *tb;
1388
1389 if (nb_tbs <= 0)
1390 return NULL;
1391 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1392 tc_ptr >= (uintptr_t)code_gen_ptr) {
1393 return NULL;
1394 }
1395 /* binary search (cf Knuth) */
1396 m_min = 0;
1397 m_max = nb_tbs - 1;
1398 while (m_min <= m_max) {
1399 m = (m_min + m_max) >> 1;
1400 tb = &tbs[m];
1401 v = (uintptr_t)tb->tc_ptr;
1402 if (v == tc_ptr)
1403 return tb;
1404 else if (tc_ptr < v) {
1405 m_max = m - 1;
1406 } else {
1407 m_min = m + 1;
1408 }
1409 }
1410 return &tbs[m_max];
1411 }
1412
1413 static void tb_reset_jump_recursive(TranslationBlock *tb);
1414
1415 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1416 {
1417 TranslationBlock *tb1, *tb_next, **ptb;
1418 unsigned int n1;
1419
1420 tb1 = tb->jmp_next[n];
1421 if (tb1 != NULL) {
1422 /* find head of list */
1423 for(;;) {
1424 n1 = (uintptr_t)tb1 & 3;
1425 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1426 if (n1 == 2)
1427 break;
1428 tb1 = tb1->jmp_next[n1];
1429 }
1430 /* we are now sure now that tb jumps to tb1 */
1431 tb_next = tb1;
1432
1433 /* remove tb from the jmp_first list */
1434 ptb = &tb_next->jmp_first;
1435 for(;;) {
1436 tb1 = *ptb;
1437 n1 = (uintptr_t)tb1 & 3;
1438 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1439 if (n1 == n && tb1 == tb)
1440 break;
1441 ptb = &tb1->jmp_next[n1];
1442 }
1443 *ptb = tb->jmp_next[n];
1444 tb->jmp_next[n] = NULL;
1445
1446 /* suppress the jump to next tb in generated code */
1447 tb_reset_jump(tb, n);
1448
1449 /* suppress jumps in the tb on which we could have jumped */
1450 tb_reset_jump_recursive(tb_next);
1451 }
1452 }
1453
1454 static void tb_reset_jump_recursive(TranslationBlock *tb)
1455 {
1456 tb_reset_jump_recursive2(tb, 0);
1457 tb_reset_jump_recursive2(tb, 1);
1458 }
1459
1460 #if defined(TARGET_HAS_ICE)
1461 #if defined(CONFIG_USER_ONLY)
1462 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1463 {
1464 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1465 }
1466 #else
1467 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1468 {
1469 ram_addr_t ram_addr;
1470 MemoryRegionSection *section;
1471
1472 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1473 if (!(memory_region_is_ram(section->mr)
1474 || (section->mr->rom_device && section->mr->readable))) {
1475 return;
1476 }
1477 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1478 + memory_region_section_addr(section, addr);
1479 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1480 }
1481
1482 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1483 {
1484 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1485 (pc & ~TARGET_PAGE_MASK));
1486 }
1487 #endif
1488 #endif /* TARGET_HAS_ICE */
1489
1490 #if defined(CONFIG_USER_ONLY)
1491 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1492
1493 {
1494 }
1495
1496 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1497 int flags, CPUWatchpoint **watchpoint)
1498 {
1499 return -ENOSYS;
1500 }
1501 #else
1502 /* Add a watchpoint. */
1503 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1504 int flags, CPUWatchpoint **watchpoint)
1505 {
1506 target_ulong len_mask = ~(len - 1);
1507 CPUWatchpoint *wp;
1508
1509 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1510 if ((len & (len - 1)) || (addr & ~len_mask) ||
1511 len == 0 || len > TARGET_PAGE_SIZE) {
1512 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1513 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1514 return -EINVAL;
1515 }
1516 wp = g_malloc(sizeof(*wp));
1517
1518 wp->vaddr = addr;
1519 wp->len_mask = len_mask;
1520 wp->flags = flags;
1521
1522 /* keep all GDB-injected watchpoints in front */
1523 if (flags & BP_GDB)
1524 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1525 else
1526 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1527
1528 tlb_flush_page(env, addr);
1529
1530 if (watchpoint)
1531 *watchpoint = wp;
1532 return 0;
1533 }
1534
1535 /* Remove a specific watchpoint. */
1536 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1537 int flags)
1538 {
1539 target_ulong len_mask = ~(len - 1);
1540 CPUWatchpoint *wp;
1541
1542 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1543 if (addr == wp->vaddr && len_mask == wp->len_mask
1544 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1545 cpu_watchpoint_remove_by_ref(env, wp);
1546 return 0;
1547 }
1548 }
1549 return -ENOENT;
1550 }
1551
1552 /* Remove a specific watchpoint by reference. */
1553 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1554 {
1555 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1556
1557 tlb_flush_page(env, watchpoint->vaddr);
1558
1559 g_free(watchpoint);
1560 }
1561
1562 /* Remove all matching watchpoints. */
1563 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1564 {
1565 CPUWatchpoint *wp, *next;
1566
1567 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1568 if (wp->flags & mask)
1569 cpu_watchpoint_remove_by_ref(env, wp);
1570 }
1571 }
1572 #endif
1573
1574 /* Add a breakpoint. */
1575 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1576 CPUBreakpoint **breakpoint)
1577 {
1578 #if defined(TARGET_HAS_ICE)
1579 CPUBreakpoint *bp;
1580
1581 bp = g_malloc(sizeof(*bp));
1582
1583 bp->pc = pc;
1584 bp->flags = flags;
1585
1586 /* keep all GDB-injected breakpoints in front */
1587 if (flags & BP_GDB)
1588 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1589 else
1590 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1591
1592 breakpoint_invalidate(env, pc);
1593
1594 if (breakpoint)
1595 *breakpoint = bp;
1596 return 0;
1597 #else
1598 return -ENOSYS;
1599 #endif
1600 }
1601
1602 /* Remove a specific breakpoint. */
1603 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1604 {
1605 #if defined(TARGET_HAS_ICE)
1606 CPUBreakpoint *bp;
1607
1608 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1609 if (bp->pc == pc && bp->flags == flags) {
1610 cpu_breakpoint_remove_by_ref(env, bp);
1611 return 0;
1612 }
1613 }
1614 return -ENOENT;
1615 #else
1616 return -ENOSYS;
1617 #endif
1618 }
1619
1620 /* Remove a specific breakpoint by reference. */
1621 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1622 {
1623 #if defined(TARGET_HAS_ICE)
1624 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1625
1626 breakpoint_invalidate(env, breakpoint->pc);
1627
1628 g_free(breakpoint);
1629 #endif
1630 }
1631
1632 /* Remove all matching breakpoints. */
1633 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1634 {
1635 #if defined(TARGET_HAS_ICE)
1636 CPUBreakpoint *bp, *next;
1637
1638 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1639 if (bp->flags & mask)
1640 cpu_breakpoint_remove_by_ref(env, bp);
1641 }
1642 #endif
1643 }
1644
1645 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1646 CPU loop after each instruction */
1647 void cpu_single_step(CPUArchState *env, int enabled)
1648 {
1649 #if defined(TARGET_HAS_ICE)
1650 if (env->singlestep_enabled != enabled) {
1651 env->singlestep_enabled = enabled;
1652 if (kvm_enabled())
1653 kvm_update_guest_debug(env, 0);
1654 else {
1655 /* must flush all the translated code to avoid inconsistencies */
1656 /* XXX: only flush what is necessary */
1657 tb_flush(env);
1658 }
1659 }
1660 #endif
1661 }
1662
1663 static void cpu_unlink_tb(CPUArchState *env)
1664 {
1665 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1666 problem and hope the cpu will stop of its own accord. For userspace
1667 emulation this often isn't actually as bad as it sounds. Often
1668 signals are used primarily to interrupt blocking syscalls. */
1669 TranslationBlock *tb;
1670 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1671
1672 spin_lock(&interrupt_lock);
1673 tb = env->current_tb;
1674 /* if the cpu is currently executing code, we must unlink it and
1675 all the potentially executing TB */
1676 if (tb) {
1677 env->current_tb = NULL;
1678 tb_reset_jump_recursive(tb);
1679 }
1680 spin_unlock(&interrupt_lock);
1681 }
1682
1683 #ifndef CONFIG_USER_ONLY
1684 /* mask must never be zero, except for A20 change call */
1685 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1686 {
1687 int old_mask;
1688
1689 old_mask = env->interrupt_request;
1690 env->interrupt_request |= mask;
1691
1692 /*
1693 * If called from iothread context, wake the target cpu in
1694 * case its halted.
1695 */
1696 if (!qemu_cpu_is_self(env)) {
1697 qemu_cpu_kick(env);
1698 return;
1699 }
1700
1701 if (use_icount) {
1702 env->icount_decr.u16.high = 0xffff;
1703 if (!can_do_io(env)
1704 && (mask & ~old_mask) != 0) {
1705 cpu_abort(env, "Raised interrupt while not in I/O function");
1706 }
1707 } else {
1708 cpu_unlink_tb(env);
1709 }
1710 }
1711
1712 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1713
1714 #else /* CONFIG_USER_ONLY */
1715
1716 void cpu_interrupt(CPUArchState *env, int mask)
1717 {
1718 env->interrupt_request |= mask;
1719 cpu_unlink_tb(env);
1720 }
1721 #endif /* CONFIG_USER_ONLY */
1722
1723 void cpu_reset_interrupt(CPUArchState *env, int mask)
1724 {
1725 env->interrupt_request &= ~mask;
1726 }
1727
1728 void cpu_exit(CPUArchState *env)
1729 {
1730 env->exit_request = 1;
1731 cpu_unlink_tb(env);
1732 }
1733
1734 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1735 {
1736 va_list ap;
1737 va_list ap2;
1738
1739 va_start(ap, fmt);
1740 va_copy(ap2, ap);
1741 fprintf(stderr, "qemu: fatal: ");
1742 vfprintf(stderr, fmt, ap);
1743 fprintf(stderr, "\n");
1744 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1745 if (qemu_log_enabled()) {
1746 qemu_log("qemu: fatal: ");
1747 qemu_log_vprintf(fmt, ap2);
1748 qemu_log("\n");
1749 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1750 qemu_log_flush();
1751 qemu_log_close();
1752 }
1753 va_end(ap2);
1754 va_end(ap);
1755 #if defined(CONFIG_USER_ONLY)
1756 {
1757 struct sigaction act;
1758 sigfillset(&act.sa_mask);
1759 act.sa_handler = SIG_DFL;
1760 sigaction(SIGABRT, &act, NULL);
1761 }
1762 #endif
1763 abort();
1764 }
1765
1766 CPUArchState *cpu_copy(CPUArchState *env)
1767 {
1768 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1769 CPUArchState *next_cpu = new_env->next_cpu;
1770 int cpu_index = new_env->cpu_index;
1771 #if defined(TARGET_HAS_ICE)
1772 CPUBreakpoint *bp;
1773 CPUWatchpoint *wp;
1774 #endif
1775
1776 memcpy(new_env, env, sizeof(CPUArchState));
1777
1778 /* Preserve chaining and index. */
1779 new_env->next_cpu = next_cpu;
1780 new_env->cpu_index = cpu_index;
1781
1782 /* Clone all break/watchpoints.
1783 Note: Once we support ptrace with hw-debug register access, make sure
1784 BP_CPU break/watchpoints are handled correctly on clone. */
1785 QTAILQ_INIT(&env->breakpoints);
1786 QTAILQ_INIT(&env->watchpoints);
1787 #if defined(TARGET_HAS_ICE)
1788 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1789 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1790 }
1791 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1792 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1793 wp->flags, NULL);
1794 }
1795 #endif
1796
1797 return new_env;
1798 }
1799
1800 #if !defined(CONFIG_USER_ONLY)
1801 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1802 {
1803 unsigned int i;
1804
1805 /* Discard jump cache entries for any tb which might potentially
1806 overlap the flushed page. */
1807 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1808 memset (&env->tb_jmp_cache[i], 0,
1809 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1810
1811 i = tb_jmp_cache_hash_page(addr);
1812 memset (&env->tb_jmp_cache[i], 0,
1813 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1814 }
1815
1816 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1817 uintptr_t length)
1818 {
1819 uintptr_t start1;
1820
1821 /* we modify the TLB cache so that the dirty bit will be set again
1822 when accessing the range */
1823 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1824 /* Check that we don't span multiple blocks - this breaks the
1825 address comparisons below. */
1826 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1827 != (end - 1) - start) {
1828 abort();
1829 }
1830 cpu_tlb_reset_dirty_all(start1, length);
1831
1832 }
1833
1834 /* Note: start and end must be within the same ram block. */
1835 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1836 int dirty_flags)
1837 {
1838 uintptr_t length;
1839
1840 start &= TARGET_PAGE_MASK;
1841 end = TARGET_PAGE_ALIGN(end);
1842
1843 length = end - start;
1844 if (length == 0)
1845 return;
1846 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1847
1848 if (tcg_enabled()) {
1849 tlb_reset_dirty_range_all(start, end, length);
1850 }
1851 }
1852
1853 int cpu_physical_memory_set_dirty_tracking(int enable)
1854 {
1855 int ret = 0;
1856 in_migration = enable;
1857 return ret;
1858 }
1859
1860 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1861 MemoryRegionSection *section,
1862 target_ulong vaddr,
1863 target_phys_addr_t paddr,
1864 int prot,
1865 target_ulong *address)
1866 {
1867 target_phys_addr_t iotlb;
1868 CPUWatchpoint *wp;
1869
1870 if (memory_region_is_ram(section->mr)) {
1871 /* Normal RAM. */
1872 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1873 + memory_region_section_addr(section, paddr);
1874 if (!section->readonly) {
1875 iotlb |= phys_section_notdirty;
1876 } else {
1877 iotlb |= phys_section_rom;
1878 }
1879 } else {
1880 /* IO handlers are currently passed a physical address.
1881 It would be nice to pass an offset from the base address
1882 of that region. This would avoid having to special case RAM,
1883 and avoid full address decoding in every device.
1884 We can't use the high bits of pd for this because
1885 IO_MEM_ROMD uses these as a ram address. */
1886 iotlb = section - phys_sections;
1887 iotlb += memory_region_section_addr(section, paddr);
1888 }
1889
1890 /* Make accesses to pages with watchpoints go via the
1891 watchpoint trap routines. */
1892 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1893 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1894 /* Avoid trapping reads of pages with a write breakpoint. */
1895 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1896 iotlb = phys_section_watch + paddr;
1897 *address |= TLB_MMIO;
1898 break;
1899 }
1900 }
1901 }
1902
1903 return iotlb;
1904 }
1905
1906 #else
1907 /*
1908 * Walks guest process memory "regions" one by one
1909 * and calls callback function 'fn' for each region.
1910 */
1911
1912 struct walk_memory_regions_data
1913 {
1914 walk_memory_regions_fn fn;
1915 void *priv;
1916 uintptr_t start;
1917 int prot;
1918 };
1919
1920 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1921 abi_ulong end, int new_prot)
1922 {
1923 if (data->start != -1ul) {
1924 int rc = data->fn(data->priv, data->start, end, data->prot);
1925 if (rc != 0) {
1926 return rc;
1927 }
1928 }
1929
1930 data->start = (new_prot ? end : -1ul);
1931 data->prot = new_prot;
1932
1933 return 0;
1934 }
1935
1936 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1937 abi_ulong base, int level, void **lp)
1938 {
1939 abi_ulong pa;
1940 int i, rc;
1941
1942 if (*lp == NULL) {
1943 return walk_memory_regions_end(data, base, 0);
1944 }
1945
1946 if (level == 0) {
1947 PageDesc *pd = *lp;
1948 for (i = 0; i < L2_SIZE; ++i) {
1949 int prot = pd[i].flags;
1950
1951 pa = base | (i << TARGET_PAGE_BITS);
1952 if (prot != data->prot) {
1953 rc = walk_memory_regions_end(data, pa, prot);
1954 if (rc != 0) {
1955 return rc;
1956 }
1957 }
1958 }
1959 } else {
1960 void **pp = *lp;
1961 for (i = 0; i < L2_SIZE; ++i) {
1962 pa = base | ((abi_ulong)i <<
1963 (TARGET_PAGE_BITS + L2_BITS * level));
1964 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1965 if (rc != 0) {
1966 return rc;
1967 }
1968 }
1969 }
1970
1971 return 0;
1972 }
1973
1974 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1975 {
1976 struct walk_memory_regions_data data;
1977 uintptr_t i;
1978
1979 data.fn = fn;
1980 data.priv = priv;
1981 data.start = -1ul;
1982 data.prot = 0;
1983
1984 for (i = 0; i < V_L1_SIZE; i++) {
1985 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1986 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1987 if (rc != 0) {
1988 return rc;
1989 }
1990 }
1991
1992 return walk_memory_regions_end(&data, 0, 0);
1993 }
1994
1995 static int dump_region(void *priv, abi_ulong start,
1996 abi_ulong end, unsigned long prot)
1997 {
1998 FILE *f = (FILE *)priv;
1999
2000 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2001 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2002 start, end, end - start,
2003 ((prot & PAGE_READ) ? 'r' : '-'),
2004 ((prot & PAGE_WRITE) ? 'w' : '-'),
2005 ((prot & PAGE_EXEC) ? 'x' : '-'));
2006
2007 return (0);
2008 }
2009
2010 /* dump memory mappings */
2011 void page_dump(FILE *f)
2012 {
2013 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2014 "start", "end", "size", "prot");
2015 walk_memory_regions(f, dump_region);
2016 }
2017
2018 int page_get_flags(target_ulong address)
2019 {
2020 PageDesc *p;
2021
2022 p = page_find(address >> TARGET_PAGE_BITS);
2023 if (!p)
2024 return 0;
2025 return p->flags;
2026 }
2027
2028 /* Modify the flags of a page and invalidate the code if necessary.
2029 The flag PAGE_WRITE_ORG is positioned automatically depending
2030 on PAGE_WRITE. The mmap_lock should already be held. */
2031 void page_set_flags(target_ulong start, target_ulong end, int flags)
2032 {
2033 target_ulong addr, len;
2034
2035 /* This function should never be called with addresses outside the
2036 guest address space. If this assert fires, it probably indicates
2037 a missing call to h2g_valid. */
2038 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2039 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2040 #endif
2041 assert(start < end);
2042
2043 start = start & TARGET_PAGE_MASK;
2044 end = TARGET_PAGE_ALIGN(end);
2045
2046 if (flags & PAGE_WRITE) {
2047 flags |= PAGE_WRITE_ORG;
2048 }
2049
2050 for (addr = start, len = end - start;
2051 len != 0;
2052 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2053 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2054
2055 /* If the write protection bit is set, then we invalidate
2056 the code inside. */
2057 if (!(p->flags & PAGE_WRITE) &&
2058 (flags & PAGE_WRITE) &&
2059 p->first_tb) {
2060 tb_invalidate_phys_page(addr, 0, NULL);
2061 }
2062 p->flags = flags;
2063 }
2064 }
2065
2066 int page_check_range(target_ulong start, target_ulong len, int flags)
2067 {
2068 PageDesc *p;
2069 target_ulong end;
2070 target_ulong addr;
2071
2072 /* This function should never be called with addresses outside the
2073 guest address space. If this assert fires, it probably indicates
2074 a missing call to h2g_valid. */
2075 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2076 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2077 #endif
2078
2079 if (len == 0) {
2080 return 0;
2081 }
2082 if (start + len - 1 < start) {
2083 /* We've wrapped around. */
2084 return -1;
2085 }
2086
2087 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2088 start = start & TARGET_PAGE_MASK;
2089
2090 for (addr = start, len = end - start;
2091 len != 0;
2092 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2093 p = page_find(addr >> TARGET_PAGE_BITS);
2094 if( !p )
2095 return -1;
2096 if( !(p->flags & PAGE_VALID) )
2097 return -1;
2098
2099 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2100 return -1;
2101 if (flags & PAGE_WRITE) {
2102 if (!(p->flags & PAGE_WRITE_ORG))
2103 return -1;
2104 /* unprotect the page if it was put read-only because it
2105 contains translated code */
2106 if (!(p->flags & PAGE_WRITE)) {
2107 if (!page_unprotect(addr, 0, NULL))
2108 return -1;
2109 }
2110 return 0;
2111 }
2112 }
2113 return 0;
2114 }
2115
2116 /* called from signal handler: invalidate the code and unprotect the
2117 page. Return TRUE if the fault was successfully handled. */
2118 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2119 {
2120 unsigned int prot;
2121 PageDesc *p;
2122 target_ulong host_start, host_end, addr;
2123
2124 /* Technically this isn't safe inside a signal handler. However we
2125 know this only ever happens in a synchronous SEGV handler, so in
2126 practice it seems to be ok. */
2127 mmap_lock();
2128
2129 p = page_find(address >> TARGET_PAGE_BITS);
2130 if (!p) {
2131 mmap_unlock();
2132 return 0;
2133 }
2134
2135 /* if the page was really writable, then we change its
2136 protection back to writable */
2137 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2138 host_start = address & qemu_host_page_mask;
2139 host_end = host_start + qemu_host_page_size;
2140
2141 prot = 0;
2142 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2143 p = page_find(addr >> TARGET_PAGE_BITS);
2144 p->flags |= PAGE_WRITE;
2145 prot |= p->flags;
2146
2147 /* and since the content will be modified, we must invalidate
2148 the corresponding translated code. */
2149 tb_invalidate_phys_page(addr, pc, puc);
2150 #ifdef DEBUG_TB_CHECK
2151 tb_invalidate_check(addr);
2152 #endif
2153 }
2154 mprotect((void *)g2h(host_start), qemu_host_page_size,
2155 prot & PAGE_BITS);
2156
2157 mmap_unlock();
2158 return 1;
2159 }
2160 mmap_unlock();
2161 return 0;
2162 }
2163 #endif /* defined(CONFIG_USER_ONLY) */
2164
2165 #if !defined(CONFIG_USER_ONLY)
2166
2167 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2168 typedef struct subpage_t {
2169 MemoryRegion iomem;
2170 target_phys_addr_t base;
2171 uint16_t sub_section[TARGET_PAGE_SIZE];
2172 } subpage_t;
2173
2174 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2175 uint16_t section);
2176 static subpage_t *subpage_init(target_phys_addr_t base);
2177 static void destroy_page_desc(uint16_t section_index)
2178 {
2179 MemoryRegionSection *section = &phys_sections[section_index];
2180 MemoryRegion *mr = section->mr;
2181
2182 if (mr->subpage) {
2183 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2184 memory_region_destroy(&subpage->iomem);
2185 g_free(subpage);
2186 }
2187 }
2188
2189 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2190 {
2191 unsigned i;
2192 PhysPageEntry *p;
2193
2194 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2195 return;
2196 }
2197
2198 p = phys_map_nodes[lp->ptr];
2199 for (i = 0; i < L2_SIZE; ++i) {
2200 if (!p[i].is_leaf) {
2201 destroy_l2_mapping(&p[i], level - 1);
2202 } else {
2203 destroy_page_desc(p[i].ptr);
2204 }
2205 }
2206 lp->is_leaf = 0;
2207 lp->ptr = PHYS_MAP_NODE_NIL;
2208 }
2209
2210 static void destroy_all_mappings(void)
2211 {
2212 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2213 phys_map_nodes_reset();
2214 }
2215
2216 static uint16_t phys_section_add(MemoryRegionSection *section)
2217 {
2218 if (phys_sections_nb == phys_sections_nb_alloc) {
2219 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2220 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2221 phys_sections_nb_alloc);
2222 }
2223 phys_sections[phys_sections_nb] = *section;
2224 return phys_sections_nb++;
2225 }
2226
2227 static void phys_sections_clear(void)
2228 {
2229 phys_sections_nb = 0;
2230 }
2231
2232 static void register_subpage(MemoryRegionSection *section)
2233 {
2234 subpage_t *subpage;
2235 target_phys_addr_t base = section->offset_within_address_space
2236 & TARGET_PAGE_MASK;
2237 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2238 MemoryRegionSection subsection = {
2239 .offset_within_address_space = base,
2240 .size = TARGET_PAGE_SIZE,
2241 };
2242 target_phys_addr_t start, end;
2243
2244 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2245
2246 if (!(existing->mr->subpage)) {
2247 subpage = subpage_init(base);
2248 subsection.mr = &subpage->iomem;
2249 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2250 phys_section_add(&subsection));
2251 } else {
2252 subpage = container_of(existing->mr, subpage_t, iomem);
2253 }
2254 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2255 end = start + section->size - 1;
2256 subpage_register(subpage, start, end, phys_section_add(section));
2257 }
2258
2259
2260 static void register_multipage(MemoryRegionSection *section)
2261 {
2262 target_phys_addr_t start_addr = section->offset_within_address_space;
2263 ram_addr_t size = section->size;
2264 target_phys_addr_t addr;
2265 uint16_t section_index = phys_section_add(section);
2266
2267 assert(size);
2268
2269 addr = start_addr;
2270 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2271 section_index);
2272 }
2273
2274 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2275 bool readonly)
2276 {
2277 MemoryRegionSection now = *section, remain = *section;
2278
2279 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2280 || (now.size < TARGET_PAGE_SIZE)) {
2281 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2282 - now.offset_within_address_space,
2283 now.size);
2284 register_subpage(&now);
2285 remain.size -= now.size;
2286 remain.offset_within_address_space += now.size;
2287 remain.offset_within_region += now.size;
2288 }
2289 while (remain.size >= TARGET_PAGE_SIZE) {
2290 now = remain;
2291 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2292 now.size = TARGET_PAGE_SIZE;
2293 register_subpage(&now);
2294 } else {
2295 now.size &= TARGET_PAGE_MASK;
2296 register_multipage(&now);
2297 }
2298 remain.size -= now.size;
2299 remain.offset_within_address_space += now.size;
2300 remain.offset_within_region += now.size;
2301 }
2302 now = remain;
2303 if (now.size) {
2304 register_subpage(&now);
2305 }
2306 }
2307
2308
2309 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2310 {
2311 if (kvm_enabled())
2312 kvm_coalesce_mmio_region(addr, size);
2313 }
2314
2315 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2316 {
2317 if (kvm_enabled())
2318 kvm_uncoalesce_mmio_region(addr, size);
2319 }
2320
2321 void qemu_flush_coalesced_mmio_buffer(void)
2322 {
2323 if (kvm_enabled())
2324 kvm_flush_coalesced_mmio_buffer();
2325 }
2326
2327 #if defined(__linux__) && !defined(TARGET_S390X)
2328
2329 #include <sys/vfs.h>
2330
2331 #define HUGETLBFS_MAGIC 0x958458f6
2332
2333 static long gethugepagesize(const char *path)
2334 {
2335 struct statfs fs;
2336 int ret;
2337
2338 do {
2339 ret = statfs(path, &fs);
2340 } while (ret != 0 && errno == EINTR);
2341
2342 if (ret != 0) {
2343 perror(path);
2344 return 0;
2345 }
2346
2347 if (fs.f_type != HUGETLBFS_MAGIC)
2348 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2349
2350 return fs.f_bsize;
2351 }
2352
2353 static void *file_ram_alloc(RAMBlock *block,
2354 ram_addr_t memory,
2355 const char *path)
2356 {
2357 char *filename;
2358 void *area;
2359 int fd;
2360 #ifdef MAP_POPULATE
2361 int flags;
2362 #endif
2363 unsigned long hpagesize;
2364
2365 hpagesize = gethugepagesize(path);
2366 if (!hpagesize) {
2367 return NULL;
2368 }
2369
2370 if (memory < hpagesize) {
2371 return NULL;
2372 }
2373
2374 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2375 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2376 return NULL;
2377 }
2378
2379 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2380 return NULL;
2381 }
2382
2383 fd = mkstemp(filename);
2384 if (fd < 0) {
2385 perror("unable to create backing store for hugepages");
2386 free(filename);
2387 return NULL;
2388 }
2389 unlink(filename);
2390 free(filename);
2391
2392 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2393
2394 /*
2395 * ftruncate is not supported by hugetlbfs in older
2396 * hosts, so don't bother bailing out on errors.
2397 * If anything goes wrong with it under other filesystems,
2398 * mmap will fail.
2399 */
2400 if (ftruncate(fd, memory))
2401 perror("ftruncate");
2402
2403 #ifdef MAP_POPULATE
2404 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2405 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2406 * to sidestep this quirk.
2407 */
2408 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2409 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2410 #else
2411 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2412 #endif
2413 if (area == MAP_FAILED) {
2414 perror("file_ram_alloc: can't mmap RAM pages");
2415 close(fd);
2416 return (NULL);
2417 }
2418 block->fd = fd;
2419 return area;
2420 }
2421 #endif
2422
2423 static ram_addr_t find_ram_offset(ram_addr_t size)
2424 {
2425 RAMBlock *block, *next_block;
2426 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2427
2428 if (QLIST_EMPTY(&ram_list.blocks))
2429 return 0;
2430
2431 QLIST_FOREACH(block, &ram_list.blocks, next) {
2432 ram_addr_t end, next = RAM_ADDR_MAX;
2433
2434 end = block->offset + block->length;
2435
2436 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2437 if (next_block->offset >= end) {
2438 next = MIN(next, next_block->offset);
2439 }
2440 }
2441 if (next - end >= size && next - end < mingap) {
2442 offset = end;
2443 mingap = next - end;
2444 }
2445 }
2446
2447 if (offset == RAM_ADDR_MAX) {
2448 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2449 (uint64_t)size);
2450 abort();
2451 }
2452
2453 return offset;
2454 }
2455
2456 static ram_addr_t last_ram_offset(void)
2457 {
2458 RAMBlock *block;
2459 ram_addr_t last = 0;
2460
2461 QLIST_FOREACH(block, &ram_list.blocks, next)
2462 last = MAX(last, block->offset + block->length);
2463
2464 return last;
2465 }
2466
2467 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2468 {
2469 int ret;
2470 QemuOpts *machine_opts;
2471
2472 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2473 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2474 if (machine_opts &&
2475 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2476 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2477 if (ret) {
2478 perror("qemu_madvise");
2479 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2480 "but dump_guest_core=off specified\n");
2481 }
2482 }
2483 }
2484
2485 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2486 {
2487 RAMBlock *new_block, *block;
2488
2489 new_block = NULL;
2490 QLIST_FOREACH(block, &ram_list.blocks, next) {
2491 if (block->offset == addr) {
2492 new_block = block;
2493 break;
2494 }
2495 }
2496 assert(new_block);
2497 assert(!new_block->idstr[0]);
2498
2499 if (dev) {
2500 char *id = qdev_get_dev_path(dev);
2501 if (id) {
2502 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2503 g_free(id);
2504 }
2505 }
2506 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2507
2508 QLIST_FOREACH(block, &ram_list.blocks, next) {
2509 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2510 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2511 new_block->idstr);
2512 abort();
2513 }
2514 }
2515 }
2516
2517 static int memory_try_enable_merging(void *addr, size_t len)
2518 {
2519 QemuOpts *opts;
2520
2521 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2522 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2523 /* disabled by the user */
2524 return 0;
2525 }
2526
2527 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2528 }
2529
2530 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2531 MemoryRegion *mr)
2532 {
2533 RAMBlock *new_block;
2534
2535 size = TARGET_PAGE_ALIGN(size);
2536 new_block = g_malloc0(sizeof(*new_block));
2537
2538 new_block->mr = mr;
2539 new_block->offset = find_ram_offset(size);
2540 if (host) {
2541 new_block->host = host;
2542 new_block->flags |= RAM_PREALLOC_MASK;
2543 } else {
2544 if (mem_path) {
2545 #if defined (__linux__) && !defined(TARGET_S390X)
2546 new_block->host = file_ram_alloc(new_block, size, mem_path);
2547 if (!new_block->host) {
2548 new_block->host = qemu_vmalloc(size);
2549 memory_try_enable_merging(new_block->host, size);
2550 }
2551 #else
2552 fprintf(stderr, "-mem-path option unsupported\n");
2553 exit(1);
2554 #endif
2555 } else {
2556 if (xen_enabled()) {
2557 xen_ram_alloc(new_block->offset, size, mr);
2558 } else if (kvm_enabled()) {
2559 /* some s390/kvm configurations have special constraints */
2560 new_block->host = kvm_vmalloc(size);
2561 } else {
2562 new_block->host = qemu_vmalloc(size);
2563 }
2564 memory_try_enable_merging(new_block->host, size);
2565 }
2566 }
2567 new_block->length = size;
2568
2569 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2570
2571 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2572 last_ram_offset() >> TARGET_PAGE_BITS);
2573 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2574 0, size >> TARGET_PAGE_BITS);
2575 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2576
2577 qemu_ram_setup_dump(new_block->host, size);
2578
2579 if (kvm_enabled())
2580 kvm_setup_guest_memory(new_block->host, size);
2581
2582 return new_block->offset;
2583 }
2584
2585 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2586 {
2587 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2588 }
2589
2590 void qemu_ram_free_from_ptr(ram_addr_t addr)
2591 {
2592 RAMBlock *block;
2593
2594 QLIST_FOREACH(block, &ram_list.blocks, next) {
2595 if (addr == block->offset) {
2596 QLIST_REMOVE(block, next);
2597 g_free(block);
2598 return;
2599 }
2600 }
2601 }
2602
2603 void qemu_ram_free(ram_addr_t addr)
2604 {
2605 RAMBlock *block;
2606
2607 QLIST_FOREACH(block, &ram_list.blocks, next) {
2608 if (addr == block->offset) {
2609 QLIST_REMOVE(block, next);
2610 if (block->flags & RAM_PREALLOC_MASK) {
2611 ;
2612 } else if (mem_path) {
2613 #if defined (__linux__) && !defined(TARGET_S390X)
2614 if (block->fd) {
2615 munmap(block->host, block->length);
2616 close(block->fd);
2617 } else {
2618 qemu_vfree(block->host);
2619 }
2620 #else
2621 abort();
2622 #endif
2623 } else {
2624 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2625 munmap(block->host, block->length);
2626 #else
2627 if (xen_enabled()) {
2628 xen_invalidate_map_cache_entry(block->host);
2629 } else {
2630 qemu_vfree(block->host);
2631 }
2632 #endif
2633 }
2634 g_free(block);
2635 return;
2636 }
2637 }
2638
2639 }
2640
2641 #ifndef _WIN32
2642 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2643 {
2644 RAMBlock *block;
2645 ram_addr_t offset;
2646 int flags;
2647 void *area, *vaddr;
2648
2649 QLIST_FOREACH(block, &ram_list.blocks, next) {
2650 offset = addr - block->offset;
2651 if (offset < block->length) {
2652 vaddr = block->host + offset;
2653 if (block->flags & RAM_PREALLOC_MASK) {
2654 ;
2655 } else {
2656 flags = MAP_FIXED;
2657 munmap(vaddr, length);
2658 if (mem_path) {
2659 #if defined(__linux__) && !defined(TARGET_S390X)
2660 if (block->fd) {
2661 #ifdef MAP_POPULATE
2662 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2663 MAP_PRIVATE;
2664 #else
2665 flags |= MAP_PRIVATE;
2666 #endif
2667 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2668 flags, block->fd, offset);
2669 } else {
2670 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2671 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2672 flags, -1, 0);
2673 }
2674 #else
2675 abort();
2676 #endif
2677 } else {
2678 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2679 flags |= MAP_SHARED | MAP_ANONYMOUS;
2680 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2681 flags, -1, 0);
2682 #else
2683 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685 flags, -1, 0);
2686 #endif
2687 }
2688 if (area != vaddr) {
2689 fprintf(stderr, "Could not remap addr: "
2690 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2691 length, addr);
2692 exit(1);
2693 }
2694 memory_try_enable_merging(vaddr, length);
2695 qemu_ram_setup_dump(vaddr, length);
2696 }
2697 return;
2698 }
2699 }
2700 }
2701 #endif /* !_WIN32 */
2702
2703 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2704 With the exception of the softmmu code in this file, this should
2705 only be used for local memory (e.g. video ram) that the device owns,
2706 and knows it isn't going to access beyond the end of the block.
2707
2708 It should not be used for general purpose DMA.
2709 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2710 */
2711 void *qemu_get_ram_ptr(ram_addr_t addr)
2712 {
2713 RAMBlock *block;
2714
2715 QLIST_FOREACH(block, &ram_list.blocks, next) {
2716 if (addr - block->offset < block->length) {
2717 /* Move this entry to to start of the list. */
2718 if (block != QLIST_FIRST(&ram_list.blocks)) {
2719 QLIST_REMOVE(block, next);
2720 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2721 }
2722 if (xen_enabled()) {
2723 /* We need to check if the requested address is in the RAM
2724 * because we don't want to map the entire memory in QEMU.
2725 * In that case just map until the end of the page.
2726 */
2727 if (block->offset == 0) {
2728 return xen_map_cache(addr, 0, 0);
2729 } else if (block->host == NULL) {
2730 block->host =
2731 xen_map_cache(block->offset, block->length, 1);
2732 }
2733 }
2734 return block->host + (addr - block->offset);
2735 }
2736 }
2737
2738 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2739 abort();
2740
2741 return NULL;
2742 }
2743
2744 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2745 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2746 */
2747 void *qemu_safe_ram_ptr(ram_addr_t addr)
2748 {
2749 RAMBlock *block;
2750
2751 QLIST_FOREACH(block, &ram_list.blocks, next) {
2752 if (addr - block->offset < block->length) {
2753 if (xen_enabled()) {
2754 /* We need to check if the requested address is in the RAM
2755 * because we don't want to map the entire memory in QEMU.
2756 * In that case just map until the end of the page.
2757 */
2758 if (block->offset == 0) {
2759 return xen_map_cache(addr, 0, 0);
2760 } else if (block->host == NULL) {
2761 block->host =
2762 xen_map_cache(block->offset, block->length, 1);
2763 }
2764 }
2765 return block->host + (addr - block->offset);
2766 }
2767 }
2768
2769 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2770 abort();
2771
2772 return NULL;
2773 }
2774
2775 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2776 * but takes a size argument */
2777 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2778 {
2779 if (*size == 0) {
2780 return NULL;
2781 }
2782 if (xen_enabled()) {
2783 return xen_map_cache(addr, *size, 1);
2784 } else {
2785 RAMBlock *block;
2786
2787 QLIST_FOREACH(block, &ram_list.blocks, next) {
2788 if (addr - block->offset < block->length) {
2789 if (addr - block->offset + *size > block->length)
2790 *size = block->length - addr + block->offset;
2791 return block->host + (addr - block->offset);
2792 }
2793 }
2794
2795 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2796 abort();
2797 }
2798 }
2799
2800 void qemu_put_ram_ptr(void *addr)
2801 {
2802 trace_qemu_put_ram_ptr(addr);
2803 }
2804
2805 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2806 {
2807 RAMBlock *block;
2808 uint8_t *host = ptr;
2809
2810 if (xen_enabled()) {
2811 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2812 return 0;
2813 }
2814
2815 QLIST_FOREACH(block, &ram_list.blocks, next) {
2816 /* This case append when the block is not mapped. */
2817 if (block->host == NULL) {
2818 continue;
2819 }
2820 if (host - block->host < block->length) {
2821 *ram_addr = block->offset + (host - block->host);
2822 return 0;
2823 }
2824 }
2825
2826 return -1;
2827 }
2828
2829 /* Some of the softmmu routines need to translate from a host pointer
2830 (typically a TLB entry) back to a ram offset. */
2831 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2832 {
2833 ram_addr_t ram_addr;
2834
2835 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2836 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2837 abort();
2838 }
2839 return ram_addr;
2840 }
2841
2842 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2843 unsigned size)
2844 {
2845 #ifdef DEBUG_UNASSIGNED
2846 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2847 #endif
2848 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2849 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2850 #endif
2851 return 0;
2852 }
2853
2854 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2855 uint64_t val, unsigned size)
2856 {
2857 #ifdef DEBUG_UNASSIGNED
2858 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2859 #endif
2860 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2861 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2862 #endif
2863 }
2864
2865 static const MemoryRegionOps unassigned_mem_ops = {
2866 .read = unassigned_mem_read,
2867 .write = unassigned_mem_write,
2868 .endianness = DEVICE_NATIVE_ENDIAN,
2869 };
2870
2871 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2872 unsigned size)
2873 {
2874 abort();
2875 }
2876
2877 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2878 uint64_t value, unsigned size)
2879 {
2880 abort();
2881 }
2882
2883 static const MemoryRegionOps error_mem_ops = {
2884 .read = error_mem_read,
2885 .write = error_mem_write,
2886 .endianness = DEVICE_NATIVE_ENDIAN,
2887 };
2888
2889 static const MemoryRegionOps rom_mem_ops = {
2890 .read = error_mem_read,
2891 .write = unassigned_mem_write,
2892 .endianness = DEVICE_NATIVE_ENDIAN,
2893 };
2894
2895 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2896 uint64_t val, unsigned size)
2897 {
2898 int dirty_flags;
2899 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2900 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2901 #if !defined(CONFIG_USER_ONLY)
2902 tb_invalidate_phys_page_fast(ram_addr, size);
2903 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2904 #endif
2905 }
2906 switch (size) {
2907 case 1:
2908 stb_p(qemu_get_ram_ptr(ram_addr), val);
2909 break;
2910 case 2:
2911 stw_p(qemu_get_ram_ptr(ram_addr), val);
2912 break;
2913 case 4:
2914 stl_p(qemu_get_ram_ptr(ram_addr), val);
2915 break;
2916 default:
2917 abort();
2918 }
2919 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2920 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2921 /* we remove the notdirty callback only if the code has been
2922 flushed */
2923 if (dirty_flags == 0xff)
2924 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2925 }
2926
2927 static const MemoryRegionOps notdirty_mem_ops = {
2928 .read = error_mem_read,
2929 .write = notdirty_mem_write,
2930 .endianness = DEVICE_NATIVE_ENDIAN,
2931 };
2932
2933 /* Generate a debug exception if a watchpoint has been hit. */
2934 static void check_watchpoint(int offset, int len_mask, int flags)
2935 {
2936 CPUArchState *env = cpu_single_env;
2937 target_ulong pc, cs_base;
2938 TranslationBlock *tb;
2939 target_ulong vaddr;
2940 CPUWatchpoint *wp;
2941 int cpu_flags;
2942
2943 if (env->watchpoint_hit) {
2944 /* We re-entered the check after replacing the TB. Now raise
2945 * the debug interrupt so that is will trigger after the
2946 * current instruction. */
2947 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2948 return;
2949 }
2950 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2951 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2952 if ((vaddr == (wp->vaddr & len_mask) ||
2953 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2954 wp->flags |= BP_WATCHPOINT_HIT;
2955 if (!env->watchpoint_hit) {
2956 env->watchpoint_hit = wp;
2957 tb = tb_find_pc(env->mem_io_pc);
2958 if (!tb) {
2959 cpu_abort(env, "check_watchpoint: could not find TB for "
2960 "pc=%p", (void *)env->mem_io_pc);
2961 }
2962 cpu_restore_state(tb, env, env->mem_io_pc);
2963 tb_phys_invalidate(tb, -1);
2964 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2965 env->exception_index = EXCP_DEBUG;
2966 cpu_loop_exit(env);
2967 } else {
2968 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2969 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2970 cpu_resume_from_signal(env, NULL);
2971 }
2972 }
2973 } else {
2974 wp->flags &= ~BP_WATCHPOINT_HIT;
2975 }
2976 }
2977 }
2978
2979 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2980 so these check for a hit then pass through to the normal out-of-line
2981 phys routines. */
2982 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2983 unsigned size)
2984 {
2985 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2986 switch (size) {
2987 case 1: return ldub_phys(addr);
2988 case 2: return lduw_phys(addr);
2989 case 4: return ldl_phys(addr);
2990 default: abort();
2991 }
2992 }
2993
2994 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2995 uint64_t val, unsigned size)
2996 {
2997 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2998 switch (size) {
2999 case 1:
3000 stb_phys(addr, val);
3001 break;
3002 case 2:
3003 stw_phys(addr, val);
3004 break;
3005 case 4:
3006 stl_phys(addr, val);
3007 break;
3008 default: abort();
3009 }
3010 }
3011
3012 static const MemoryRegionOps watch_mem_ops = {
3013 .read = watch_mem_read,
3014 .write = watch_mem_write,
3015 .endianness = DEVICE_NATIVE_ENDIAN,
3016 };
3017
3018 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3019 unsigned len)
3020 {
3021 subpage_t *mmio = opaque;
3022 unsigned int idx = SUBPAGE_IDX(addr);
3023 MemoryRegionSection *section;
3024 #if defined(DEBUG_SUBPAGE)
3025 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3026 mmio, len, addr, idx);
3027 #endif
3028
3029 section = &phys_sections[mmio->sub_section[idx]];
3030 addr += mmio->base;
3031 addr -= section->offset_within_address_space;
3032 addr += section->offset_within_region;
3033 return io_mem_read(section->mr, addr, len);
3034 }
3035
3036 static void subpage_write(void *opaque, target_phys_addr_t addr,
3037 uint64_t value, unsigned len)
3038 {
3039 subpage_t *mmio = opaque;
3040 unsigned int idx = SUBPAGE_IDX(addr);
3041 MemoryRegionSection *section;
3042 #if defined(DEBUG_SUBPAGE)
3043 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3044 " idx %d value %"PRIx64"\n",
3045 __func__, mmio, len, addr, idx, value);
3046 #endif
3047
3048 section = &phys_sections[mmio->sub_section[idx]];
3049 addr += mmio->base;
3050 addr -= section->offset_within_address_space;
3051 addr += section->offset_within_region;
3052 io_mem_write(section->mr, addr, value, len);
3053 }
3054
3055 static const MemoryRegionOps subpage_ops = {
3056 .read = subpage_read,
3057 .write = subpage_write,
3058 .endianness = DEVICE_NATIVE_ENDIAN,
3059 };
3060
3061 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3062 unsigned size)
3063 {
3064 ram_addr_t raddr = addr;
3065 void *ptr = qemu_get_ram_ptr(raddr);
3066 switch (size) {
3067 case 1: return ldub_p(ptr);
3068 case 2: return lduw_p(ptr);
3069 case 4: return ldl_p(ptr);
3070 default: abort();
3071 }
3072 }
3073
3074 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3075 uint64_t value, unsigned size)
3076 {
3077 ram_addr_t raddr = addr;
3078 void *ptr = qemu_get_ram_ptr(raddr);
3079 switch (size) {
3080 case 1: return stb_p(ptr, value);
3081 case 2: return stw_p(ptr, value);
3082 case 4: return stl_p(ptr, value);
3083 default: abort();
3084 }
3085 }
3086
3087 static const MemoryRegionOps subpage_ram_ops = {
3088 .read = subpage_ram_read,
3089 .write = subpage_ram_write,
3090 .endianness = DEVICE_NATIVE_ENDIAN,
3091 };
3092
3093 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3094 uint16_t section)
3095 {
3096 int idx, eidx;
3097
3098 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3099 return -1;
3100 idx = SUBPAGE_IDX(start);
3101 eidx = SUBPAGE_IDX(end);
3102 #if defined(DEBUG_SUBPAGE)
3103 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3104 mmio, start, end, idx, eidx, memory);
3105 #endif
3106 if (memory_region_is_ram(phys_sections[section].mr)) {
3107 MemoryRegionSection new_section = phys_sections[section];
3108 new_section.mr = &io_mem_subpage_ram;
3109 section = phys_section_add(&new_section);
3110 }
3111 for (; idx <= eidx; idx++) {
3112 mmio->sub_section[idx] = section;
3113 }
3114
3115 return 0;
3116 }
3117
3118 static subpage_t *subpage_init(target_phys_addr_t base)
3119 {
3120 subpage_t *mmio;
3121
3122 mmio = g_malloc0(sizeof(subpage_t));
3123
3124 mmio->base = base;
3125 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3126 "subpage", TARGET_PAGE_SIZE);
3127 mmio->iomem.subpage = true;
3128 #if defined(DEBUG_SUBPAGE)
3129 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3130 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3131 #endif
3132 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3133
3134 return mmio;
3135 }
3136
3137 static uint16_t dummy_section(MemoryRegion *mr)
3138 {
3139 MemoryRegionSection section = {
3140 .mr = mr,
3141 .offset_within_address_space = 0,
3142 .offset_within_region = 0,
3143 .size = UINT64_MAX,
3144 };
3145
3146 return phys_section_add(&section);
3147 }
3148
3149 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3150 {
3151 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3152 }
3153
3154 static void io_mem_init(void)
3155 {
3156 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3157 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3158 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3159 "unassigned", UINT64_MAX);
3160 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3161 "notdirty", UINT64_MAX);
3162 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3163 "subpage-ram", UINT64_MAX);
3164 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3165 "watch", UINT64_MAX);
3166 }
3167
3168 static void core_begin(MemoryListener *listener)
3169 {
3170 destroy_all_mappings();
3171 phys_sections_clear();
3172 phys_map.ptr = PHYS_MAP_NODE_NIL;
3173 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3174 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3175 phys_section_rom = dummy_section(&io_mem_rom);
3176 phys_section_watch = dummy_section(&io_mem_watch);
3177 }
3178
3179 static void core_commit(MemoryListener *listener)
3180 {
3181 CPUArchState *env;
3182
3183 /* since each CPU stores ram addresses in its TLB cache, we must
3184 reset the modified entries */
3185 /* XXX: slow ! */
3186 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3187 tlb_flush(env, 1);
3188 }
3189 }
3190
3191 static void core_region_add(MemoryListener *listener,
3192 MemoryRegionSection *section)
3193 {
3194 cpu_register_physical_memory_log(section, section->readonly);
3195 }
3196
3197 static void core_region_del(MemoryListener *listener,
3198 MemoryRegionSection *section)
3199 {
3200 }
3201
3202 static void core_region_nop(MemoryListener *listener,
3203 MemoryRegionSection *section)
3204 {
3205 cpu_register_physical_memory_log(section, section->readonly);
3206 }
3207
3208 static void core_log_start(MemoryListener *listener,
3209 MemoryRegionSection *section)
3210 {
3211 }
3212
3213 static void core_log_stop(MemoryListener *listener,
3214 MemoryRegionSection *section)
3215 {
3216 }
3217
3218 static void core_log_sync(MemoryListener *listener,
3219 MemoryRegionSection *section)
3220 {
3221 }
3222
3223 static void core_log_global_start(MemoryListener *listener)
3224 {
3225 cpu_physical_memory_set_dirty_tracking(1);
3226 }
3227
3228 static void core_log_global_stop(MemoryListener *listener)
3229 {
3230 cpu_physical_memory_set_dirty_tracking(0);
3231 }
3232
3233 static void core_eventfd_add(MemoryListener *listener,
3234 MemoryRegionSection *section,
3235 bool match_data, uint64_t data, EventNotifier *e)
3236 {
3237 }
3238
3239 static void core_eventfd_del(MemoryListener *listener,
3240 MemoryRegionSection *section,
3241 bool match_data, uint64_t data, EventNotifier *e)
3242 {
3243 }
3244
3245 static void io_begin(MemoryListener *listener)
3246 {
3247 }
3248
3249 static void io_commit(MemoryListener *listener)
3250 {
3251 }
3252
3253 static void io_region_add(MemoryListener *listener,
3254 MemoryRegionSection *section)
3255 {
3256 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3257
3258 mrio->mr = section->mr;
3259 mrio->offset = section->offset_within_region;
3260 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3261 section->offset_within_address_space, section->size);
3262 ioport_register(&mrio->iorange);
3263 }
3264
3265 static void io_region_del(MemoryListener *listener,
3266 MemoryRegionSection *section)
3267 {
3268 isa_unassign_ioport(section->offset_within_address_space, section->size);
3269 }
3270
3271 static void io_region_nop(MemoryListener *listener,
3272 MemoryRegionSection *section)
3273 {
3274 }
3275
3276 static void io_log_start(MemoryListener *listener,
3277 MemoryRegionSection *section)
3278 {
3279 }
3280
3281 static void io_log_stop(MemoryListener *listener,
3282 MemoryRegionSection *section)
3283 {
3284 }
3285
3286 static void io_log_sync(MemoryListener *listener,
3287 MemoryRegionSection *section)
3288 {
3289 }
3290
3291 static void io_log_global_start(MemoryListener *listener)
3292 {
3293 }
3294
3295 static void io_log_global_stop(MemoryListener *listener)
3296 {
3297 }
3298
3299 static void io_eventfd_add(MemoryListener *listener,
3300 MemoryRegionSection *section,
3301 bool match_data, uint64_t data, EventNotifier *e)
3302 {
3303 }
3304
3305 static void io_eventfd_del(MemoryListener *listener,
3306 MemoryRegionSection *section,
3307 bool match_data, uint64_t data, EventNotifier *e)
3308 {
3309 }
3310
3311 static MemoryListener core_memory_listener = {
3312 .begin = core_begin,
3313 .commit = core_commit,
3314 .region_add = core_region_add,
3315 .region_del = core_region_del,
3316 .region_nop = core_region_nop,
3317 .log_start = core_log_start,
3318 .log_stop = core_log_stop,
3319 .log_sync = core_log_sync,
3320 .log_global_start = core_log_global_start,
3321 .log_global_stop = core_log_global_stop,
3322 .eventfd_add = core_eventfd_add,
3323 .eventfd_del = core_eventfd_del,
3324 .priority = 0,
3325 };
3326
3327 static MemoryListener io_memory_listener = {
3328 .begin = io_begin,
3329 .commit = io_commit,
3330 .region_add = io_region_add,
3331 .region_del = io_region_del,
3332 .region_nop = io_region_nop,
3333 .log_start = io_log_start,
3334 .log_stop = io_log_stop,
3335 .log_sync = io_log_sync,
3336 .log_global_start = io_log_global_start,
3337 .log_global_stop = io_log_global_stop,
3338 .eventfd_add = io_eventfd_add,
3339 .eventfd_del = io_eventfd_del,
3340 .priority = 0,
3341 };
3342
3343 static void memory_map_init(void)
3344 {
3345 system_memory = g_malloc(sizeof(*system_memory));
3346 memory_region_init(system_memory, "system", INT64_MAX);
3347 set_system_memory_map(system_memory);
3348
3349 system_io = g_malloc(sizeof(*system_io));
3350 memory_region_init(system_io, "io", 65536);
3351 set_system_io_map(system_io);
3352
3353 memory_listener_register(&core_memory_listener, system_memory);
3354 memory_listener_register(&io_memory_listener, system_io);
3355 }
3356
3357 MemoryRegion *get_system_memory(void)
3358 {
3359 return system_memory;
3360 }
3361
3362 MemoryRegion *get_system_io(void)
3363 {
3364 return system_io;
3365 }
3366
3367 #endif /* !defined(CONFIG_USER_ONLY) */
3368
3369 /* physical memory access (slow version, mainly for debug) */
3370 #if defined(CONFIG_USER_ONLY)
3371 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3372 uint8_t *buf, int len, int is_write)
3373 {
3374 int l, flags;
3375 target_ulong page;
3376 void * p;
3377
3378 while (len > 0) {
3379 page = addr & TARGET_PAGE_MASK;
3380 l = (page + TARGET_PAGE_SIZE) - addr;
3381 if (l > len)
3382 l = len;
3383 flags = page_get_flags(page);
3384 if (!(flags & PAGE_VALID))
3385 return -1;
3386 if (is_write) {
3387 if (!(flags & PAGE_WRITE))
3388 return -1;
3389 /* XXX: this code should not depend on lock_user */
3390 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3391 return -1;
3392 memcpy(p, buf, l);
3393 unlock_user(p, addr, l);
3394 } else {
3395 if (!(flags & PAGE_READ))
3396 return -1;
3397 /* XXX: this code should not depend on lock_user */
3398 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3399 return -1;
3400 memcpy(buf, p, l);
3401 unlock_user(p, addr, 0);
3402 }
3403 len -= l;
3404 buf += l;
3405 addr += l;
3406 }
3407 return 0;
3408 }
3409
3410 #else
3411
3412 static void invalidate_and_set_dirty(target_phys_addr_t addr,
3413 target_phys_addr_t length)
3414 {
3415 if (!cpu_physical_memory_is_dirty(addr)) {
3416 /* invalidate code */
3417 tb_invalidate_phys_page_range(addr, addr + length, 0);
3418 /* set dirty bit */
3419 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3420 }
3421 xen_modified_memory(addr, length);
3422 }
3423
3424 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3425 int len, int is_write)
3426 {
3427 int l;
3428 uint8_t *ptr;
3429 uint32_t val;
3430 target_phys_addr_t page;
3431 MemoryRegionSection *section;
3432
3433 while (len > 0) {
3434 page = addr & TARGET_PAGE_MASK;
3435 l = (page + TARGET_PAGE_SIZE) - addr;
3436 if (l > len)
3437 l = len;
3438 section = phys_page_find(page >> TARGET_PAGE_BITS);
3439
3440 if (is_write) {
3441 if (!memory_region_is_ram(section->mr)) {
3442 target_phys_addr_t addr1;
3443 addr1 = memory_region_section_addr(section, addr);
3444 /* XXX: could force cpu_single_env to NULL to avoid
3445 potential bugs */
3446 if (l >= 4 && ((addr1 & 3) == 0)) {
3447 /* 32 bit write access */
3448 val = ldl_p(buf);
3449 io_mem_write(section->mr, addr1, val, 4);
3450 l = 4;
3451 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3452 /* 16 bit write access */
3453 val = lduw_p(buf);
3454 io_mem_write(section->mr, addr1, val, 2);
3455 l = 2;
3456 } else {
3457 /* 8 bit write access */
3458 val = ldub_p(buf);
3459 io_mem_write(section->mr, addr1, val, 1);
3460 l = 1;
3461 }
3462 } else if (!section->readonly) {
3463 ram_addr_t addr1;
3464 addr1 = memory_region_get_ram_addr(section->mr)
3465 + memory_region_section_addr(section, addr);
3466 /* RAM case */
3467 ptr = qemu_get_ram_ptr(addr1);
3468 memcpy(ptr, buf, l);
3469 invalidate_and_set_dirty(addr1, l);
3470 qemu_put_ram_ptr(ptr);
3471 }
3472 } else {
3473 if (!(memory_region_is_ram(section->mr) ||
3474 memory_region_is_romd(section->mr))) {
3475 target_phys_addr_t addr1;
3476 /* I/O case */
3477 addr1 = memory_region_section_addr(section, addr);
3478 if (l >= 4 && ((addr1 & 3) == 0)) {
3479 /* 32 bit read access */
3480 val = io_mem_read(section->mr, addr1, 4);
3481 stl_p(buf, val);
3482 l = 4;
3483 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3484 /* 16 bit read access */
3485 val = io_mem_read(section->mr, addr1, 2);
3486 stw_p(buf, val);
3487 l = 2;
3488 } else {
3489 /* 8 bit read access */
3490 val = io_mem_read(section->mr, addr1, 1);
3491 stb_p(buf, val);
3492 l = 1;
3493 }
3494 } else {
3495 /* RAM case */
3496 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3497 + memory_region_section_addr(section,
3498 addr));
3499 memcpy(buf, ptr, l);
3500 qemu_put_ram_ptr(ptr);
3501 }
3502 }
3503 len -= l;
3504 buf += l;
3505 addr += l;
3506 }
3507 }
3508
3509 /* used for ROM loading : can write in RAM and ROM */
3510 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3511 const uint8_t *buf, int len)
3512 {
3513 int l;
3514 uint8_t *ptr;
3515 target_phys_addr_t page;
3516 MemoryRegionSection *section;
3517
3518 while (len > 0) {
3519 page = addr & TARGET_PAGE_MASK;
3520 l = (page + TARGET_PAGE_SIZE) - addr;
3521 if (l > len)
3522 l = len;
3523 section = phys_page_find(page >> TARGET_PAGE_BITS);
3524
3525 if (!(memory_region_is_ram(section->mr) ||
3526 memory_region_is_romd(section->mr))) {
3527 /* do nothing */
3528 } else {
3529 unsigned long addr1;
3530 addr1 = memory_region_get_ram_addr(section->mr)
3531 + memory_region_section_addr(section, addr);
3532 /* ROM/RAM case */
3533 ptr = qemu_get_ram_ptr(addr1);
3534 memcpy(ptr, buf, l);
3535 invalidate_and_set_dirty(addr1, l);
3536 qemu_put_ram_ptr(ptr);
3537 }
3538 len -= l;
3539 buf += l;
3540 addr += l;
3541 }
3542 }
3543
3544 typedef struct {
3545 void *buffer;
3546 target_phys_addr_t addr;
3547 target_phys_addr_t len;
3548 } BounceBuffer;
3549
3550 static BounceBuffer bounce;
3551
3552 typedef struct MapClient {
3553 void *opaque;
3554 void (*callback)(void *opaque);
3555 QLIST_ENTRY(MapClient) link;
3556 } MapClient;
3557
3558 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3559 = QLIST_HEAD_INITIALIZER(map_client_list);
3560
3561 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3562 {
3563 MapClient *client = g_malloc(sizeof(*client));
3564
3565 client->opaque = opaque;
3566 client->callback = callback;
3567 QLIST_INSERT_HEAD(&map_client_list, client, link);
3568 return client;
3569 }
3570
3571 void cpu_unregister_map_client(void *_client)
3572 {
3573 MapClient *client = (MapClient *)_client;
3574
3575 QLIST_REMOVE(client, link);
3576 g_free(client);
3577 }
3578
3579 static void cpu_notify_map_clients(void)
3580 {
3581 MapClient *client;
3582
3583 while (!QLIST_EMPTY(&map_client_list)) {
3584 client = QLIST_FIRST(&map_client_list);
3585 client->callback(client->opaque);
3586 cpu_unregister_map_client(client);
3587 }
3588 }
3589
3590 /* Map a physical memory region into a host virtual address.
3591 * May map a subset of the requested range, given by and returned in *plen.
3592 * May return NULL if resources needed to perform the mapping are exhausted.
3593 * Use only for reads OR writes - not for read-modify-write operations.
3594 * Use cpu_register_map_client() to know when retrying the map operation is
3595 * likely to succeed.
3596 */
3597 void *cpu_physical_memory_map(target_phys_addr_t addr,
3598 target_phys_addr_t *plen,
3599 int is_write)
3600 {
3601 target_phys_addr_t len = *plen;
3602 target_phys_addr_t todo = 0;
3603 int l;
3604 target_phys_addr_t page;
3605 MemoryRegionSection *section;
3606 ram_addr_t raddr = RAM_ADDR_MAX;
3607 ram_addr_t rlen;
3608 void *ret;
3609
3610 while (len > 0) {
3611 page = addr & TARGET_PAGE_MASK;
3612 l = (page + TARGET_PAGE_SIZE) - addr;
3613 if (l > len)
3614 l = len;
3615 section = phys_page_find(page >> TARGET_PAGE_BITS);
3616
3617 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3618 if (todo || bounce.buffer) {
3619 break;
3620 }
3621 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3622 bounce.addr = addr;
3623 bounce.len = l;
3624 if (!is_write) {
3625 cpu_physical_memory_read(addr, bounce.buffer, l);
3626 }
3627
3628 *plen = l;
3629 return bounce.buffer;
3630 }
3631 if (!todo) {
3632 raddr = memory_region_get_ram_addr(section->mr)
3633 + memory_region_section_addr(section, addr);
3634 }
3635
3636 len -= l;
3637 addr += l;
3638 todo += l;
3639 }
3640 rlen = todo;
3641 ret = qemu_ram_ptr_length(raddr, &rlen);
3642 *plen = rlen;
3643 return ret;
3644 }
3645
3646 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3647 * Will also mark the memory as dirty if is_write == 1. access_len gives
3648 * the amount of memory that was actually read or written by the caller.
3649 */
3650 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3651 int is_write, target_phys_addr_t access_len)
3652 {
3653 if (buffer != bounce.buffer) {
3654 if (is_write) {
3655 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3656 while (access_len) {
3657 unsigned l;
3658 l = TARGET_PAGE_SIZE;
3659 if (l > access_len)
3660 l = access_len;
3661 invalidate_and_set_dirty(addr1, l);
3662 addr1 += l;
3663 access_len -= l;
3664 }
3665 }
3666 if (xen_enabled()) {
3667 xen_invalidate_map_cache_entry(buffer);
3668 }
3669 return;
3670 }
3671 if (is_write) {
3672 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3673 }
3674 qemu_vfree(bounce.buffer);
3675 bounce.buffer = NULL;
3676 cpu_notify_map_clients();
3677 }
3678
3679 /* warning: addr must be aligned */
3680 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3681 enum device_endian endian)
3682 {
3683 uint8_t *ptr;
3684 uint32_t val;
3685 MemoryRegionSection *section;
3686
3687 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3688
3689 if (!(memory_region_is_ram(section->mr) ||
3690 memory_region_is_romd(section->mr))) {
3691 /* I/O case */
3692 addr = memory_region_section_addr(section, addr);
3693 val = io_mem_read(section->mr, addr, 4);
3694 #if defined(TARGET_WORDS_BIGENDIAN)
3695 if (endian == DEVICE_LITTLE_ENDIAN) {
3696 val = bswap32(val);
3697 }
3698 #else
3699 if (endian == DEVICE_BIG_ENDIAN) {
3700 val = bswap32(val);
3701 }
3702 #endif
3703 } else {
3704 /* RAM case */
3705 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3706 & TARGET_PAGE_MASK)
3707 + memory_region_section_addr(section, addr));
3708 switch (endian) {
3709 case DEVICE_LITTLE_ENDIAN:
3710 val = ldl_le_p(ptr);
3711 break;
3712 case DEVICE_BIG_ENDIAN:
3713 val = ldl_be_p(ptr);
3714 break;
3715 default:
3716 val = ldl_p(ptr);
3717 break;
3718 }
3719 }
3720 return val;
3721 }
3722
3723 uint32_t ldl_phys(target_phys_addr_t addr)
3724 {
3725 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3726 }
3727
3728 uint32_t ldl_le_phys(target_phys_addr_t addr)
3729 {
3730 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3731 }
3732
3733 uint32_t ldl_be_phys(target_phys_addr_t addr)
3734 {
3735 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3736 }
3737
3738 /* warning: addr must be aligned */
3739 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3740 enum device_endian endian)
3741 {
3742 uint8_t *ptr;
3743 uint64_t val;
3744 MemoryRegionSection *section;
3745
3746 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3747
3748 if (!(memory_region_is_ram(section->mr) ||
3749 memory_region_is_romd(section->mr))) {
3750 /* I/O case */
3751 addr = memory_region_section_addr(section, addr);
3752
3753 /* XXX This is broken when device endian != cpu endian.
3754 Fix and add "endian" variable check */
3755 #ifdef TARGET_WORDS_BIGENDIAN
3756 val = io_mem_read(section->mr, addr, 4) << 32;
3757 val |= io_mem_read(section->mr, addr + 4, 4);
3758 #else
3759 val = io_mem_read(section->mr, addr, 4);
3760 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3761 #endif
3762 } else {
3763 /* RAM case */
3764 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3765 & TARGET_PAGE_MASK)
3766 + memory_region_section_addr(section, addr));
3767 switch (endian) {
3768 case DEVICE_LITTLE_ENDIAN:
3769 val = ldq_le_p(ptr);
3770 break;
3771 case DEVICE_BIG_ENDIAN:
3772 val = ldq_be_p(ptr);
3773 break;
3774 default:
3775 val = ldq_p(ptr);
3776 break;
3777 }
3778 }
3779 return val;
3780 }
3781
3782 uint64_t ldq_phys(target_phys_addr_t addr)
3783 {
3784 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3785 }
3786
3787 uint64_t ldq_le_phys(target_phys_addr_t addr)
3788 {
3789 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3790 }
3791
3792 uint64_t ldq_be_phys(target_phys_addr_t addr)
3793 {
3794 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3795 }
3796
3797 /* XXX: optimize */
3798 uint32_t ldub_phys(target_phys_addr_t addr)
3799 {
3800 uint8_t val;
3801 cpu_physical_memory_read(addr, &val, 1);
3802 return val;
3803 }
3804
3805 /* warning: addr must be aligned */
3806 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3807 enum device_endian endian)
3808 {
3809 uint8_t *ptr;
3810 uint64_t val;
3811 MemoryRegionSection *section;
3812
3813 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3814
3815 if (!(memory_region_is_ram(section->mr) ||
3816 memory_region_is_romd(section->mr))) {
3817 /* I/O case */
3818 addr = memory_region_section_addr(section, addr);
3819 val = io_mem_read(section->mr, addr, 2);
3820 #if defined(TARGET_WORDS_BIGENDIAN)
3821 if (endian == DEVICE_LITTLE_ENDIAN) {
3822 val = bswap16(val);
3823 }
3824 #else
3825 if (endian == DEVICE_BIG_ENDIAN) {
3826 val = bswap16(val);
3827 }
3828 #endif
3829 } else {
3830 /* RAM case */
3831 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3832 & TARGET_PAGE_MASK)
3833 + memory_region_section_addr(section, addr));
3834 switch (endian) {
3835 case DEVICE_LITTLE_ENDIAN:
3836 val = lduw_le_p(ptr);
3837 break;
3838 case DEVICE_BIG_ENDIAN:
3839 val = lduw_be_p(ptr);
3840 break;
3841 default:
3842 val = lduw_p(ptr);
3843 break;
3844 }
3845 }
3846 return val;
3847 }
3848
3849 uint32_t lduw_phys(target_phys_addr_t addr)
3850 {
3851 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3852 }
3853
3854 uint32_t lduw_le_phys(target_phys_addr_t addr)
3855 {
3856 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3857 }
3858
3859 uint32_t lduw_be_phys(target_phys_addr_t addr)
3860 {
3861 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3862 }
3863
3864 /* warning: addr must be aligned. The ram page is not masked as dirty
3865 and the code inside is not invalidated. It is useful if the dirty
3866 bits are used to track modified PTEs */
3867 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3868 {
3869 uint8_t *ptr;
3870 MemoryRegionSection *section;
3871
3872 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3873
3874 if (!memory_region_is_ram(section->mr) || section->readonly) {
3875 addr = memory_region_section_addr(section, addr);
3876 if (memory_region_is_ram(section->mr)) {
3877 section = &phys_sections[phys_section_rom];
3878 }
3879 io_mem_write(section->mr, addr, val, 4);
3880 } else {
3881 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3882 & TARGET_PAGE_MASK)
3883 + memory_region_section_addr(section, addr);
3884 ptr = qemu_get_ram_ptr(addr1);
3885 stl_p(ptr, val);
3886
3887 if (unlikely(in_migration)) {
3888 if (!cpu_physical_memory_is_dirty(addr1)) {
3889 /* invalidate code */
3890 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3891 /* set dirty bit */
3892 cpu_physical_memory_set_dirty_flags(
3893 addr1, (0xff & ~CODE_DIRTY_FLAG));
3894 }
3895 }
3896 }
3897 }
3898
3899 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3900 {
3901 uint8_t *ptr;
3902 MemoryRegionSection *section;
3903
3904 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3905
3906 if (!memory_region_is_ram(section->mr) || section->readonly) {
3907 addr = memory_region_section_addr(section, addr);
3908 if (memory_region_is_ram(section->mr)) {
3909 section = &phys_sections[phys_section_rom];
3910 }
3911 #ifdef TARGET_WORDS_BIGENDIAN
3912 io_mem_write(section->mr, addr, val >> 32, 4);
3913 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3914 #else
3915 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3916 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3917 #endif
3918 } else {
3919 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3920 & TARGET_PAGE_MASK)
3921 + memory_region_section_addr(section, addr));
3922 stq_p(ptr, val);
3923 }
3924 }
3925
3926 /* warning: addr must be aligned */
3927 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3928 enum device_endian endian)
3929 {
3930 uint8_t *ptr;
3931 MemoryRegionSection *section;
3932
3933 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3934
3935 if (!memory_region_is_ram(section->mr) || section->readonly) {
3936 addr = memory_region_section_addr(section, addr);
3937 if (memory_region_is_ram(section->mr)) {
3938 section = &phys_sections[phys_section_rom];
3939 }
3940 #if defined(TARGET_WORDS_BIGENDIAN)
3941 if (endian == DEVICE_LITTLE_ENDIAN) {
3942 val = bswap32(val);
3943 }
3944 #else
3945 if (endian == DEVICE_BIG_ENDIAN) {
3946 val = bswap32(val);
3947 }
3948 #endif
3949 io_mem_write(section->mr, addr, val, 4);
3950 } else {
3951 unsigned long addr1;
3952 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3953 + memory_region_section_addr(section, addr);
3954 /* RAM case */
3955 ptr = qemu_get_ram_ptr(addr1);
3956 switch (endian) {
3957 case DEVICE_LITTLE_ENDIAN:
3958 stl_le_p(ptr, val);
3959 break;
3960 case DEVICE_BIG_ENDIAN:
3961 stl_be_p(ptr, val);
3962 break;
3963 default:
3964 stl_p(ptr, val);
3965 break;
3966 }
3967 invalidate_and_set_dirty(addr1, 4);
3968 }
3969 }
3970
3971 void stl_phys(target_phys_addr_t addr, uint32_t val)
3972 {
3973 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3974 }
3975
3976 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3977 {
3978 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3979 }
3980
3981 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3982 {
3983 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3984 }
3985
3986 /* XXX: optimize */
3987 void stb_phys(target_phys_addr_t addr, uint32_t val)
3988 {
3989 uint8_t v = val;
3990 cpu_physical_memory_write(addr, &v, 1);
3991 }
3992
3993 /* warning: addr must be aligned */
3994 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3995 enum device_endian endian)
3996 {
3997 uint8_t *ptr;
3998 MemoryRegionSection *section;
3999
4000 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4001
4002 if (!memory_region_is_ram(section->mr) || section->readonly) {
4003 addr = memory_region_section_addr(section, addr);
4004 if (memory_region_is_ram(section->mr)) {
4005 section = &phys_sections[phys_section_rom];
4006 }
4007 #if defined(TARGET_WORDS_BIGENDIAN)
4008 if (endian == DEVICE_LITTLE_ENDIAN) {
4009 val = bswap16(val);
4010 }
4011 #else
4012 if (endian == DEVICE_BIG_ENDIAN) {
4013 val = bswap16(val);
4014 }
4015 #endif
4016 io_mem_write(section->mr, addr, val, 2);
4017 } else {
4018 unsigned long addr1;
4019 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4020 + memory_region_section_addr(section, addr);
4021 /* RAM case */
4022 ptr = qemu_get_ram_ptr(addr1);
4023 switch (endian) {
4024 case DEVICE_LITTLE_ENDIAN:
4025 stw_le_p(ptr, val);
4026 break;
4027 case DEVICE_BIG_ENDIAN:
4028 stw_be_p(ptr, val);
4029 break;
4030 default:
4031 stw_p(ptr, val);
4032 break;
4033 }
4034 invalidate_and_set_dirty(addr1, 2);
4035 }
4036 }
4037
4038 void stw_phys(target_phys_addr_t addr, uint32_t val)
4039 {
4040 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4041 }
4042
4043 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4044 {
4045 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4046 }
4047
4048 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4049 {
4050 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4051 }
4052
4053 /* XXX: optimize */
4054 void stq_phys(target_phys_addr_t addr, uint64_t val)
4055 {
4056 val = tswap64(val);
4057 cpu_physical_memory_write(addr, &val, 8);
4058 }
4059
4060 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4061 {
4062 val = cpu_to_le64(val);
4063 cpu_physical_memory_write(addr, &val, 8);
4064 }
4065
4066 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4067 {
4068 val = cpu_to_be64(val);
4069 cpu_physical_memory_write(addr, &val, 8);
4070 }
4071
4072 /* virtual memory access for debug (includes writing to ROM) */
4073 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4074 uint8_t *buf, int len, int is_write)
4075 {
4076 int l;
4077 target_phys_addr_t phys_addr;
4078 target_ulong page;
4079
4080 while (len > 0) {
4081 page = addr & TARGET_PAGE_MASK;
4082 phys_addr = cpu_get_phys_page_debug(env, page);
4083 /* if no physical page mapped, return an error */
4084 if (phys_addr == -1)
4085 return -1;
4086 l = (page + TARGET_PAGE_SIZE) - addr;
4087 if (l > len)
4088 l = len;
4089 phys_addr += (addr & ~TARGET_PAGE_MASK);
4090 if (is_write)
4091 cpu_physical_memory_write_rom(phys_addr, buf, l);
4092 else
4093 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4094 len -= l;
4095 buf += l;
4096 addr += l;
4097 }
4098 return 0;
4099 }
4100 #endif
4101
4102 /* in deterministic execution mode, instructions doing device I/Os
4103 must be at the end of the TB */
4104 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4105 {
4106 TranslationBlock *tb;
4107 uint32_t n, cflags;
4108 target_ulong pc, cs_base;
4109 uint64_t flags;
4110
4111 tb = tb_find_pc(retaddr);
4112 if (!tb) {
4113 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4114 (void *)retaddr);
4115 }
4116 n = env->icount_decr.u16.low + tb->icount;
4117 cpu_restore_state(tb, env, retaddr);
4118 /* Calculate how many instructions had been executed before the fault
4119 occurred. */
4120 n = n - env->icount_decr.u16.low;
4121 /* Generate a new TB ending on the I/O insn. */
4122 n++;
4123 /* On MIPS and SH, delay slot instructions can only be restarted if
4124 they were already the first instruction in the TB. If this is not
4125 the first instruction in a TB then re-execute the preceding
4126 branch. */
4127 #if defined(TARGET_MIPS)
4128 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4129 env->active_tc.PC -= 4;
4130 env->icount_decr.u16.low++;
4131 env->hflags &= ~MIPS_HFLAG_BMASK;
4132 }
4133 #elif defined(TARGET_SH4)
4134 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4135 && n > 1) {
4136 env->pc -= 2;
4137 env->icount_decr.u16.low++;
4138 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4139 }
4140 #endif
4141 /* This should never happen. */
4142 if (n > CF_COUNT_MASK)
4143 cpu_abort(env, "TB too big during recompile");
4144
4145 cflags = n | CF_LAST_IO;
4146 pc = tb->pc;
4147 cs_base = tb->cs_base;
4148 flags = tb->flags;
4149 tb_phys_invalidate(tb, -1);
4150 /* FIXME: In theory this could raise an exception. In practice
4151 we have already translated the block once so it's probably ok. */
4152 tb_gen_code(env, pc, cs_base, flags, cflags);
4153 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4154 the first in the TB) then we end up generating a whole new TB and
4155 repeating the fault, which is horribly inefficient.
4156 Better would be to execute just this insn uncached, or generate a
4157 second new TB. */
4158 cpu_resume_from_signal(env, NULL);
4159 }
4160
4161 #if !defined(CONFIG_USER_ONLY)
4162
4163 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4164 {
4165 int i, target_code_size, max_target_code_size;
4166 int direct_jmp_count, direct_jmp2_count, cross_page;
4167 TranslationBlock *tb;
4168
4169 target_code_size = 0;
4170 max_target_code_size = 0;
4171 cross_page = 0;
4172 direct_jmp_count = 0;
4173 direct_jmp2_count = 0;
4174 for(i = 0; i < nb_tbs; i++) {
4175 tb = &tbs[i];
4176 target_code_size += tb->size;
4177 if (tb->size > max_target_code_size)
4178 max_target_code_size = tb->size;
4179 if (tb->page_addr[1] != -1)
4180 cross_page++;
4181 if (tb->tb_next_offset[0] != 0xffff) {
4182 direct_jmp_count++;
4183 if (tb->tb_next_offset[1] != 0xffff) {
4184 direct_jmp2_count++;
4185 }
4186 }
4187 }
4188 /* XXX: avoid using doubles ? */
4189 cpu_fprintf(f, "Translation buffer state:\n");
4190 cpu_fprintf(f, "gen code size %td/%ld\n",
4191 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4192 cpu_fprintf(f, "TB count %d/%d\n",
4193 nb_tbs, code_gen_max_blocks);
4194 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4195 nb_tbs ? target_code_size / nb_tbs : 0,
4196 max_target_code_size);
4197 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4198 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4199 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4200 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4201 cross_page,
4202 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4203 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4204 direct_jmp_count,
4205 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4206 direct_jmp2_count,
4207 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4208 cpu_fprintf(f, "\nStatistics:\n");
4209 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4210 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4211 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4212 tcg_dump_info(f, cpu_fprintf);
4213 }
4214
4215 /*
4216 * A helper function for the _utterly broken_ virtio device model to find out if
4217 * it's running on a big endian machine. Don't do this at home kids!
4218 */
4219 bool virtio_is_big_endian(void);
4220 bool virtio_is_big_endian(void)
4221 {
4222 #if defined(TARGET_WORDS_BIGENDIAN)
4223 return true;
4224 #else
4225 return false;
4226 #endif
4227 }
4228
4229 #endif
4230
4231 #ifndef CONFIG_USER_ONLY
4232 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4233 {
4234 MemoryRegionSection *section;
4235
4236 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4237
4238 return !(memory_region_is_ram(section->mr) ||
4239 memory_region_is_romd(section->mr));
4240 }
4241 #endif