]> git.proxmox.com Git - qemu.git/blob - exec.c
target-mips: Remove unused inline function
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
103
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
110
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
114
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
119
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
122
123 #endif
124
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
145
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
157
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
161
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
174
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
182
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
186
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
189
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
196
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
201 };
202
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212
213 static void io_mem_init(void);
214 static void memory_map_init(void);
215
216 static MemoryRegion io_mem_watch;
217 #endif
218
219 /* log support */
220 #ifdef WIN32
221 static const char *logfilename = "qemu.log";
222 #else
223 static const char *logfilename = "/tmp/qemu.log";
224 #endif
225 FILE *logfile;
226 int loglevel;
227 static int log_append = 0;
228
229 /* statistics */
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
232
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
235 {
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
239
240 }
241 #else
242 static void map_exec(void *addr, long size)
243 {
244 unsigned long start, end, page_size;
245
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
249
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
253
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
256 }
257 #endif
258
259 static void page_init(void)
260 {
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
264 {
265 SYSTEM_INFO system_info;
266
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
269 }
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278
279 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 {
281 #ifdef HAVE_KINFO_GETVMMAP
282 struct kinfo_vmentry *freep;
283 int i, cnt;
284
285 freep = kinfo_getvmmap(getpid(), &cnt);
286 if (freep) {
287 mmap_lock();
288 for (i = 0; i < cnt; i++) {
289 unsigned long startaddr, endaddr;
290
291 startaddr = freep[i].kve_start;
292 endaddr = freep[i].kve_end;
293 if (h2g_valid(startaddr)) {
294 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295
296 if (h2g_valid(endaddr)) {
297 endaddr = h2g(endaddr);
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 } else {
300 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
301 endaddr = ~0ul;
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 #endif
304 }
305 }
306 }
307 free(freep);
308 mmap_unlock();
309 }
310 #else
311 FILE *f;
312
313 last_brk = (unsigned long)sbrk(0);
314
315 f = fopen("/compat/linux/proc/self/maps", "r");
316 if (f) {
317 mmap_lock();
318
319 do {
320 unsigned long startaddr, endaddr;
321 int n;
322
323 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324
325 if (n == 2 && h2g_valid(startaddr)) {
326 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327
328 if (h2g_valid(endaddr)) {
329 endaddr = h2g(endaddr);
330 } else {
331 endaddr = ~0ul;
332 }
333 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 }
335 } while (!feof(f));
336
337 fclose(f);
338 mmap_unlock();
339 }
340 #endif
341 }
342 #endif
343 }
344
345 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 {
347 PageDesc *pd;
348 void **lp;
349 int i;
350
351 #if defined(CONFIG_USER_ONLY)
352 /* We can't use g_malloc because it may recurse into a locked mutex. */
353 # define ALLOC(P, SIZE) \
354 do { \
355 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
356 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
357 } while (0)
358 #else
359 # define ALLOC(P, SIZE) \
360 do { P = g_malloc0(SIZE); } while (0)
361 #endif
362
363 /* Level 1. Always allocated. */
364 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365
366 /* Level 2..N-1. */
367 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
368 void **p = *lp;
369
370 if (p == NULL) {
371 if (!alloc) {
372 return NULL;
373 }
374 ALLOC(p, sizeof(void *) * L2_SIZE);
375 *lp = p;
376 }
377
378 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
379 }
380
381 pd = *lp;
382 if (pd == NULL) {
383 if (!alloc) {
384 return NULL;
385 }
386 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
387 *lp = pd;
388 }
389
390 #undef ALLOC
391
392 return pd + (index & (L2_SIZE - 1));
393 }
394
395 static inline PageDesc *page_find(tb_page_addr_t index)
396 {
397 return page_find_alloc(index, 0);
398 }
399
400 #if !defined(CONFIG_USER_ONLY)
401
402 static void phys_map_node_reserve(unsigned nodes)
403 {
404 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
405 typedef PhysPageEntry Node[L2_SIZE];
406 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
407 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
408 phys_map_nodes_nb + nodes);
409 phys_map_nodes = g_renew(Node, phys_map_nodes,
410 phys_map_nodes_nb_alloc);
411 }
412 }
413
414 static uint16_t phys_map_node_alloc(void)
415 {
416 unsigned i;
417 uint16_t ret;
418
419 ret = phys_map_nodes_nb++;
420 assert(ret != PHYS_MAP_NODE_NIL);
421 assert(ret != phys_map_nodes_nb_alloc);
422 for (i = 0; i < L2_SIZE; ++i) {
423 phys_map_nodes[ret][i].is_leaf = 0;
424 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
425 }
426 return ret;
427 }
428
429 static void phys_map_nodes_reset(void)
430 {
431 phys_map_nodes_nb = 0;
432 }
433
434
435 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
436 target_phys_addr_t *nb, uint16_t leaf,
437 int level)
438 {
439 PhysPageEntry *p;
440 int i;
441 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
442
443 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
444 lp->ptr = phys_map_node_alloc();
445 p = phys_map_nodes[lp->ptr];
446 if (level == 0) {
447 for (i = 0; i < L2_SIZE; i++) {
448 p[i].is_leaf = 1;
449 p[i].ptr = phys_section_unassigned;
450 }
451 }
452 } else {
453 p = phys_map_nodes[lp->ptr];
454 }
455 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
456
457 while (*nb && lp < &p[L2_SIZE]) {
458 if ((*index & (step - 1)) == 0 && *nb >= step) {
459 lp->is_leaf = true;
460 lp->ptr = leaf;
461 *index += step;
462 *nb -= step;
463 } else {
464 phys_page_set_level(lp, index, nb, leaf, level - 1);
465 }
466 ++lp;
467 }
468 }
469
470 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
471 uint16_t leaf)
472 {
473 /* Wildly overreserve - it doesn't matter much. */
474 phys_map_node_reserve(3 * P_L2_LEVELS);
475
476 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
477 }
478
479 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
480 {
481 PhysPageEntry lp = phys_map;
482 PhysPageEntry *p;
483 int i;
484 uint16_t s_index = phys_section_unassigned;
485
486 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
487 if (lp.ptr == PHYS_MAP_NODE_NIL) {
488 goto not_found;
489 }
490 p = phys_map_nodes[lp.ptr];
491 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
492 }
493
494 s_index = lp.ptr;
495 not_found:
496 return &phys_sections[s_index];
497 }
498
499 bool memory_region_is_unassigned(MemoryRegion *mr)
500 {
501 return mr != &io_mem_ram && mr != &io_mem_rom
502 && mr != &io_mem_notdirty && !mr->rom_device
503 && mr != &io_mem_watch;
504 }
505
506 #define mmap_lock() do { } while(0)
507 #define mmap_unlock() do { } while(0)
508 #endif
509
510 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
511
512 #if defined(CONFIG_USER_ONLY)
513 /* Currently it is not recommended to allocate big chunks of data in
514 user mode. It will change when a dedicated libc will be used */
515 #define USE_STATIC_CODE_GEN_BUFFER
516 #endif
517
518 #ifdef USE_STATIC_CODE_GEN_BUFFER
519 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
520 __attribute__((aligned (CODE_GEN_ALIGN)));
521 #endif
522
523 static void code_gen_alloc(unsigned long tb_size)
524 {
525 #ifdef USE_STATIC_CODE_GEN_BUFFER
526 code_gen_buffer = static_code_gen_buffer;
527 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 map_exec(code_gen_buffer, code_gen_buffer_size);
529 #else
530 code_gen_buffer_size = tb_size;
531 if (code_gen_buffer_size == 0) {
532 #if defined(CONFIG_USER_ONLY)
533 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
534 #else
535 /* XXX: needs adjustments */
536 code_gen_buffer_size = (unsigned long)(ram_size / 4);
537 #endif
538 }
539 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
540 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
541 /* The code gen buffer location may have constraints depending on
542 the host cpu and OS */
543 #if defined(__linux__)
544 {
545 int flags;
546 void *start = NULL;
547
548 flags = MAP_PRIVATE | MAP_ANONYMOUS;
549 #if defined(__x86_64__)
550 flags |= MAP_32BIT;
551 /* Cannot map more than that */
552 if (code_gen_buffer_size > (800 * 1024 * 1024))
553 code_gen_buffer_size = (800 * 1024 * 1024);
554 #elif defined(__sparc_v9__)
555 // Map the buffer below 2G, so we can use direct calls and branches
556 flags |= MAP_FIXED;
557 start = (void *) 0x60000000UL;
558 if (code_gen_buffer_size > (512 * 1024 * 1024))
559 code_gen_buffer_size = (512 * 1024 * 1024);
560 #elif defined(__arm__)
561 /* Keep the buffer no bigger than 16MB to branch between blocks */
562 if (code_gen_buffer_size > 16 * 1024 * 1024)
563 code_gen_buffer_size = 16 * 1024 * 1024;
564 #elif defined(__s390x__)
565 /* Map the buffer so that we can use direct calls and branches. */
566 /* We have a +- 4GB range on the branches; leave some slop. */
567 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
568 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
569 }
570 start = (void *)0x90000000UL;
571 #endif
572 code_gen_buffer = mmap(start, code_gen_buffer_size,
573 PROT_WRITE | PROT_READ | PROT_EXEC,
574 flags, -1, 0);
575 if (code_gen_buffer == MAP_FAILED) {
576 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
577 exit(1);
578 }
579 }
580 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
581 || defined(__DragonFly__) || defined(__OpenBSD__) \
582 || defined(__NetBSD__)
583 {
584 int flags;
585 void *addr = NULL;
586 flags = MAP_PRIVATE | MAP_ANONYMOUS;
587 #if defined(__x86_64__)
588 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
589 * 0x40000000 is free */
590 flags |= MAP_FIXED;
591 addr = (void *)0x40000000;
592 /* Cannot map more than that */
593 if (code_gen_buffer_size > (800 * 1024 * 1024))
594 code_gen_buffer_size = (800 * 1024 * 1024);
595 #elif defined(__sparc_v9__)
596 // Map the buffer below 2G, so we can use direct calls and branches
597 flags |= MAP_FIXED;
598 addr = (void *) 0x60000000UL;
599 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
600 code_gen_buffer_size = (512 * 1024 * 1024);
601 }
602 #endif
603 code_gen_buffer = mmap(addr, code_gen_buffer_size,
604 PROT_WRITE | PROT_READ | PROT_EXEC,
605 flags, -1, 0);
606 if (code_gen_buffer == MAP_FAILED) {
607 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
608 exit(1);
609 }
610 }
611 #else
612 code_gen_buffer = g_malloc(code_gen_buffer_size);
613 map_exec(code_gen_buffer, code_gen_buffer_size);
614 #endif
615 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
616 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
617 code_gen_buffer_max_size = code_gen_buffer_size -
618 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
619 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
620 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
621 }
622
623 /* Must be called before using the QEMU cpus. 'tb_size' is the size
624 (in bytes) allocated to the translation buffer. Zero means default
625 size. */
626 void tcg_exec_init(unsigned long tb_size)
627 {
628 cpu_gen_init();
629 code_gen_alloc(tb_size);
630 code_gen_ptr = code_gen_buffer;
631 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 page_init();
633 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
634 /* There's no guest base to take into account, so go ahead and
635 initialize the prologue now. */
636 tcg_prologue_init(&tcg_ctx);
637 #endif
638 }
639
640 bool tcg_enabled(void)
641 {
642 return code_gen_buffer != NULL;
643 }
644
645 void cpu_exec_init_all(void)
646 {
647 #if !defined(CONFIG_USER_ONLY)
648 memory_map_init();
649 io_mem_init();
650 #endif
651 }
652
653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654
655 static int cpu_common_post_load(void *opaque, int version_id)
656 {
657 CPUArchState *env = opaque;
658
659 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
660 version_id is increased. */
661 env->interrupt_request &= ~0x01;
662 tlb_flush(env, 1);
663
664 return 0;
665 }
666
667 static const VMStateDescription vmstate_cpu_common = {
668 .name = "cpu_common",
669 .version_id = 1,
670 .minimum_version_id = 1,
671 .minimum_version_id_old = 1,
672 .post_load = cpu_common_post_load,
673 .fields = (VMStateField []) {
674 VMSTATE_UINT32(halted, CPUArchState),
675 VMSTATE_UINT32(interrupt_request, CPUArchState),
676 VMSTATE_END_OF_LIST()
677 }
678 };
679 #endif
680
681 CPUArchState *qemu_get_cpu(int cpu)
682 {
683 CPUArchState *env = first_cpu;
684
685 while (env) {
686 if (env->cpu_index == cpu)
687 break;
688 env = env->next_cpu;
689 }
690
691 return env;
692 }
693
694 void cpu_exec_init(CPUArchState *env)
695 {
696 CPUArchState **penv;
697 int cpu_index;
698
699 #if defined(CONFIG_USER_ONLY)
700 cpu_list_lock();
701 #endif
702 env->next_cpu = NULL;
703 penv = &first_cpu;
704 cpu_index = 0;
705 while (*penv != NULL) {
706 penv = &(*penv)->next_cpu;
707 cpu_index++;
708 }
709 env->cpu_index = cpu_index;
710 env->numa_node = 0;
711 QTAILQ_INIT(&env->breakpoints);
712 QTAILQ_INIT(&env->watchpoints);
713 #ifndef CONFIG_USER_ONLY
714 env->thread_id = qemu_get_thread_id();
715 #endif
716 *penv = env;
717 #if defined(CONFIG_USER_ONLY)
718 cpu_list_unlock();
719 #endif
720 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
721 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
722 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
723 cpu_save, cpu_load, env);
724 #endif
725 }
726
727 /* Allocate a new translation block. Flush the translation buffer if
728 too many translation blocks or too much generated code. */
729 static TranslationBlock *tb_alloc(target_ulong pc)
730 {
731 TranslationBlock *tb;
732
733 if (nb_tbs >= code_gen_max_blocks ||
734 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
735 return NULL;
736 tb = &tbs[nb_tbs++];
737 tb->pc = pc;
738 tb->cflags = 0;
739 return tb;
740 }
741
742 void tb_free(TranslationBlock *tb)
743 {
744 /* In practice this is mostly used for single use temporary TB
745 Ignore the hard cases and just back up if this TB happens to
746 be the last one generated. */
747 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
748 code_gen_ptr = tb->tc_ptr;
749 nb_tbs--;
750 }
751 }
752
753 static inline void invalidate_page_bitmap(PageDesc *p)
754 {
755 if (p->code_bitmap) {
756 g_free(p->code_bitmap);
757 p->code_bitmap = NULL;
758 }
759 p->code_write_count = 0;
760 }
761
762 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763
764 static void page_flush_tb_1 (int level, void **lp)
765 {
766 int i;
767
768 if (*lp == NULL) {
769 return;
770 }
771 if (level == 0) {
772 PageDesc *pd = *lp;
773 for (i = 0; i < L2_SIZE; ++i) {
774 pd[i].first_tb = NULL;
775 invalidate_page_bitmap(pd + i);
776 }
777 } else {
778 void **pp = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 page_flush_tb_1 (level - 1, pp + i);
781 }
782 }
783 }
784
785 static void page_flush_tb(void)
786 {
787 int i;
788 for (i = 0; i < V_L1_SIZE; i++) {
789 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
790 }
791 }
792
793 /* flush all the translation blocks */
794 /* XXX: tb_flush is currently not thread safe */
795 void tb_flush(CPUArchState *env1)
796 {
797 CPUArchState *env;
798 #if defined(DEBUG_FLUSH)
799 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
800 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 nb_tbs, nb_tbs > 0 ?
802 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 #endif
804 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
805 cpu_abort(env1, "Internal error: code buffer overflow\n");
806
807 nb_tbs = 0;
808
809 for(env = first_cpu; env != NULL; env = env->next_cpu) {
810 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 }
812
813 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
814 page_flush_tb();
815
816 code_gen_ptr = code_gen_buffer;
817 /* XXX: flush processor icache at this point if cache flush is
818 expensive */
819 tb_flush_count++;
820 }
821
822 #ifdef DEBUG_TB_CHECK
823
824 static void tb_invalidate_check(target_ulong address)
825 {
826 TranslationBlock *tb;
827 int i;
828 address &= TARGET_PAGE_MASK;
829 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
830 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
831 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
832 address >= tb->pc + tb->size)) {
833 printf("ERROR invalidate: address=" TARGET_FMT_lx
834 " PC=%08lx size=%04x\n",
835 address, (long)tb->pc, tb->size);
836 }
837 }
838 }
839 }
840
841 /* verify that all the pages have correct rights for code */
842 static void tb_page_check(void)
843 {
844 TranslationBlock *tb;
845 int i, flags1, flags2;
846
847 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
848 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
849 flags1 = page_get_flags(tb->pc);
850 flags2 = page_get_flags(tb->pc + tb->size - 1);
851 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
852 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
853 (long)tb->pc, tb->size, flags1, flags2);
854 }
855 }
856 }
857 }
858
859 #endif
860
861 /* invalidate one TB */
862 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
863 int next_offset)
864 {
865 TranslationBlock *tb1;
866 for(;;) {
867 tb1 = *ptb;
868 if (tb1 == tb) {
869 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
870 break;
871 }
872 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
873 }
874 }
875
876 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 {
878 TranslationBlock *tb1;
879 unsigned int n1;
880
881 for(;;) {
882 tb1 = *ptb;
883 n1 = (uintptr_t)tb1 & 3;
884 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 if (tb1 == tb) {
886 *ptb = tb1->page_next[n1];
887 break;
888 }
889 ptb = &tb1->page_next[n1];
890 }
891 }
892
893 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 {
895 TranslationBlock *tb1, **ptb;
896 unsigned int n1;
897
898 ptb = &tb->jmp_next[n];
899 tb1 = *ptb;
900 if (tb1) {
901 /* find tb(n) in circular list */
902 for(;;) {
903 tb1 = *ptb;
904 n1 = (uintptr_t)tb1 & 3;
905 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
906 if (n1 == n && tb1 == tb)
907 break;
908 if (n1 == 2) {
909 ptb = &tb1->jmp_first;
910 } else {
911 ptb = &tb1->jmp_next[n1];
912 }
913 }
914 /* now we can suppress tb(n) from the list */
915 *ptb = tb->jmp_next[n];
916
917 tb->jmp_next[n] = NULL;
918 }
919 }
920
921 /* reset the jump entry 'n' of a TB so that it is not chained to
922 another TB */
923 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 {
925 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
926 }
927
928 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 {
930 CPUArchState *env;
931 PageDesc *p;
932 unsigned int h, n1;
933 tb_page_addr_t phys_pc;
934 TranslationBlock *tb1, *tb2;
935
936 /* remove the TB from the hash list */
937 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
938 h = tb_phys_hash_func(phys_pc);
939 tb_remove(&tb_phys_hash[h], tb,
940 offsetof(TranslationBlock, phys_hash_next));
941
942 /* remove the TB from the page list */
943 if (tb->page_addr[0] != page_addr) {
944 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
945 tb_page_remove(&p->first_tb, tb);
946 invalidate_page_bitmap(p);
947 }
948 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
949 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
950 tb_page_remove(&p->first_tb, tb);
951 invalidate_page_bitmap(p);
952 }
953
954 tb_invalidated_flag = 1;
955
956 /* remove the TB from the hash list */
957 h = tb_jmp_cache_hash_func(tb->pc);
958 for(env = first_cpu; env != NULL; env = env->next_cpu) {
959 if (env->tb_jmp_cache[h] == tb)
960 env->tb_jmp_cache[h] = NULL;
961 }
962
963 /* suppress this TB from the two jump lists */
964 tb_jmp_remove(tb, 0);
965 tb_jmp_remove(tb, 1);
966
967 /* suppress any remaining jumps to this TB */
968 tb1 = tb->jmp_first;
969 for(;;) {
970 n1 = (uintptr_t)tb1 & 3;
971 if (n1 == 2)
972 break;
973 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
974 tb2 = tb1->jmp_next[n1];
975 tb_reset_jump(tb1, n1);
976 tb1->jmp_next[n1] = NULL;
977 tb1 = tb2;
978 }
979 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980
981 tb_phys_invalidate_count++;
982 }
983
984 static inline void set_bits(uint8_t *tab, int start, int len)
985 {
986 int end, mask, end1;
987
988 end = start + len;
989 tab += start >> 3;
990 mask = 0xff << (start & 7);
991 if ((start & ~7) == (end & ~7)) {
992 if (start < end) {
993 mask &= ~(0xff << (end & 7));
994 *tab |= mask;
995 }
996 } else {
997 *tab++ |= mask;
998 start = (start + 8) & ~7;
999 end1 = end & ~7;
1000 while (start < end1) {
1001 *tab++ = 0xff;
1002 start += 8;
1003 }
1004 if (start < end) {
1005 mask = ~(0xff << (end & 7));
1006 *tab |= mask;
1007 }
1008 }
1009 }
1010
1011 static void build_page_bitmap(PageDesc *p)
1012 {
1013 int n, tb_start, tb_end;
1014 TranslationBlock *tb;
1015
1016 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1017
1018 tb = p->first_tb;
1019 while (tb != NULL) {
1020 n = (uintptr_t)tb & 3;
1021 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1022 /* NOTE: this is subtle as a TB may span two physical pages */
1023 if (n == 0) {
1024 /* NOTE: tb_end may be after the end of the page, but
1025 it is not a problem */
1026 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1027 tb_end = tb_start + tb->size;
1028 if (tb_end > TARGET_PAGE_SIZE)
1029 tb_end = TARGET_PAGE_SIZE;
1030 } else {
1031 tb_start = 0;
1032 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 }
1034 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1035 tb = tb->page_next[n];
1036 }
1037 }
1038
1039 TranslationBlock *tb_gen_code(CPUArchState *env,
1040 target_ulong pc, target_ulong cs_base,
1041 int flags, int cflags)
1042 {
1043 TranslationBlock *tb;
1044 uint8_t *tc_ptr;
1045 tb_page_addr_t phys_pc, phys_page2;
1046 target_ulong virt_page2;
1047 int code_gen_size;
1048
1049 phys_pc = get_page_addr_code(env, pc);
1050 tb = tb_alloc(pc);
1051 if (!tb) {
1052 /* flush must be done */
1053 tb_flush(env);
1054 /* cannot fail at this point */
1055 tb = tb_alloc(pc);
1056 /* Don't forget to invalidate previous TB info. */
1057 tb_invalidated_flag = 1;
1058 }
1059 tc_ptr = code_gen_ptr;
1060 tb->tc_ptr = tc_ptr;
1061 tb->cs_base = cs_base;
1062 tb->flags = flags;
1063 tb->cflags = cflags;
1064 cpu_gen_code(env, tb, &code_gen_size);
1065 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1066 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067
1068 /* check next page if needed */
1069 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 phys_page2 = -1;
1071 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1072 phys_page2 = get_page_addr_code(env, virt_page2);
1073 }
1074 tb_link_page(tb, phys_pc, phys_page2);
1075 return tb;
1076 }
1077
1078 /* invalidate all TBs which intersect with the target physical page
1079 starting in range [start;end[. NOTE: start and end must refer to
1080 the same physical page. 'is_cpu_write_access' should be true if called
1081 from a real cpu write access: the virtual CPU will exit the current
1082 TB if code is modified inside this TB. */
1083 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1084 int is_cpu_write_access)
1085 {
1086 TranslationBlock *tb, *tb_next, *saved_tb;
1087 CPUArchState *env = cpu_single_env;
1088 tb_page_addr_t tb_start, tb_end;
1089 PageDesc *p;
1090 int n;
1091 #ifdef TARGET_HAS_PRECISE_SMC
1092 int current_tb_not_found = is_cpu_write_access;
1093 TranslationBlock *current_tb = NULL;
1094 int current_tb_modified = 0;
1095 target_ulong current_pc = 0;
1096 target_ulong current_cs_base = 0;
1097 int current_flags = 0;
1098 #endif /* TARGET_HAS_PRECISE_SMC */
1099
1100 p = page_find(start >> TARGET_PAGE_BITS);
1101 if (!p)
1102 return;
1103 if (!p->code_bitmap &&
1104 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1105 is_cpu_write_access) {
1106 /* build code bitmap */
1107 build_page_bitmap(p);
1108 }
1109
1110 /* we remove all the TBs in the range [start, end[ */
1111 /* XXX: see if in some cases it could be faster to invalidate all the code */
1112 tb = p->first_tb;
1113 while (tb != NULL) {
1114 n = (uintptr_t)tb & 3;
1115 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1116 tb_next = tb->page_next[n];
1117 /* NOTE: this is subtle as a TB may span two physical pages */
1118 if (n == 0) {
1119 /* NOTE: tb_end may be after the end of the page, but
1120 it is not a problem */
1121 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1122 tb_end = tb_start + tb->size;
1123 } else {
1124 tb_start = tb->page_addr[1];
1125 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1126 }
1127 if (!(tb_end <= start || tb_start >= end)) {
1128 #ifdef TARGET_HAS_PRECISE_SMC
1129 if (current_tb_not_found) {
1130 current_tb_not_found = 0;
1131 current_tb = NULL;
1132 if (env->mem_io_pc) {
1133 /* now we have a real cpu fault */
1134 current_tb = tb_find_pc(env->mem_io_pc);
1135 }
1136 }
1137 if (current_tb == tb &&
1138 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1139 /* If we are modifying the current TB, we must stop
1140 its execution. We could be more precise by checking
1141 that the modification is after the current PC, but it
1142 would require a specialized function to partially
1143 restore the CPU state */
1144
1145 current_tb_modified = 1;
1146 cpu_restore_state(current_tb, env, env->mem_io_pc);
1147 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1148 &current_flags);
1149 }
1150 #endif /* TARGET_HAS_PRECISE_SMC */
1151 /* we need to do that to handle the case where a signal
1152 occurs while doing tb_phys_invalidate() */
1153 saved_tb = NULL;
1154 if (env) {
1155 saved_tb = env->current_tb;
1156 env->current_tb = NULL;
1157 }
1158 tb_phys_invalidate(tb, -1);
1159 if (env) {
1160 env->current_tb = saved_tb;
1161 if (env->interrupt_request && env->current_tb)
1162 cpu_interrupt(env, env->interrupt_request);
1163 }
1164 }
1165 tb = tb_next;
1166 }
1167 #if !defined(CONFIG_USER_ONLY)
1168 /* if no code remaining, no need to continue to use slow writes */
1169 if (!p->first_tb) {
1170 invalidate_page_bitmap(p);
1171 if (is_cpu_write_access) {
1172 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1173 }
1174 }
1175 #endif
1176 #ifdef TARGET_HAS_PRECISE_SMC
1177 if (current_tb_modified) {
1178 /* we generate a block containing just the instruction
1179 modifying the memory. It will ensure that it cannot modify
1180 itself */
1181 env->current_tb = NULL;
1182 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1183 cpu_resume_from_signal(env, NULL);
1184 }
1185 #endif
1186 }
1187
1188 /* len must be <= 8 and start must be a multiple of len */
1189 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1190 {
1191 PageDesc *p;
1192 int offset, b;
1193 #if 0
1194 if (1) {
1195 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1196 cpu_single_env->mem_io_vaddr, len,
1197 cpu_single_env->eip,
1198 cpu_single_env->eip +
1199 (intptr_t)cpu_single_env->segs[R_CS].base);
1200 }
1201 #endif
1202 p = page_find(start >> TARGET_PAGE_BITS);
1203 if (!p)
1204 return;
1205 if (p->code_bitmap) {
1206 offset = start & ~TARGET_PAGE_MASK;
1207 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1208 if (b & ((1 << len) - 1))
1209 goto do_invalidate;
1210 } else {
1211 do_invalidate:
1212 tb_invalidate_phys_page_range(start, start + len, 1);
1213 }
1214 }
1215
1216 #if !defined(CONFIG_SOFTMMU)
1217 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1218 uintptr_t pc, void *puc)
1219 {
1220 TranslationBlock *tb;
1221 PageDesc *p;
1222 int n;
1223 #ifdef TARGET_HAS_PRECISE_SMC
1224 TranslationBlock *current_tb = NULL;
1225 CPUArchState *env = cpu_single_env;
1226 int current_tb_modified = 0;
1227 target_ulong current_pc = 0;
1228 target_ulong current_cs_base = 0;
1229 int current_flags = 0;
1230 #endif
1231
1232 addr &= TARGET_PAGE_MASK;
1233 p = page_find(addr >> TARGET_PAGE_BITS);
1234 if (!p)
1235 return;
1236 tb = p->first_tb;
1237 #ifdef TARGET_HAS_PRECISE_SMC
1238 if (tb && pc != 0) {
1239 current_tb = tb_find_pc(pc);
1240 }
1241 #endif
1242 while (tb != NULL) {
1243 n = (uintptr_t)tb & 3;
1244 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1245 #ifdef TARGET_HAS_PRECISE_SMC
1246 if (current_tb == tb &&
1247 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1248 /* If we are modifying the current TB, we must stop
1249 its execution. We could be more precise by checking
1250 that the modification is after the current PC, but it
1251 would require a specialized function to partially
1252 restore the CPU state */
1253
1254 current_tb_modified = 1;
1255 cpu_restore_state(current_tb, env, pc);
1256 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1257 &current_flags);
1258 }
1259 #endif /* TARGET_HAS_PRECISE_SMC */
1260 tb_phys_invalidate(tb, addr);
1261 tb = tb->page_next[n];
1262 }
1263 p->first_tb = NULL;
1264 #ifdef TARGET_HAS_PRECISE_SMC
1265 if (current_tb_modified) {
1266 /* we generate a block containing just the instruction
1267 modifying the memory. It will ensure that it cannot modify
1268 itself */
1269 env->current_tb = NULL;
1270 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1271 cpu_resume_from_signal(env, puc);
1272 }
1273 #endif
1274 }
1275 #endif
1276
1277 /* add the tb in the target page and protect it if necessary */
1278 static inline void tb_alloc_page(TranslationBlock *tb,
1279 unsigned int n, tb_page_addr_t page_addr)
1280 {
1281 PageDesc *p;
1282 #ifndef CONFIG_USER_ONLY
1283 bool page_already_protected;
1284 #endif
1285
1286 tb->page_addr[n] = page_addr;
1287 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1288 tb->page_next[n] = p->first_tb;
1289 #ifndef CONFIG_USER_ONLY
1290 page_already_protected = p->first_tb != NULL;
1291 #endif
1292 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1293 invalidate_page_bitmap(p);
1294
1295 #if defined(TARGET_HAS_SMC) || 1
1296
1297 #if defined(CONFIG_USER_ONLY)
1298 if (p->flags & PAGE_WRITE) {
1299 target_ulong addr;
1300 PageDesc *p2;
1301 int prot;
1302
1303 /* force the host page as non writable (writes will have a
1304 page fault + mprotect overhead) */
1305 page_addr &= qemu_host_page_mask;
1306 prot = 0;
1307 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1308 addr += TARGET_PAGE_SIZE) {
1309
1310 p2 = page_find (addr >> TARGET_PAGE_BITS);
1311 if (!p2)
1312 continue;
1313 prot |= p2->flags;
1314 p2->flags &= ~PAGE_WRITE;
1315 }
1316 mprotect(g2h(page_addr), qemu_host_page_size,
1317 (prot & PAGE_BITS) & ~PAGE_WRITE);
1318 #ifdef DEBUG_TB_INVALIDATE
1319 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1320 page_addr);
1321 #endif
1322 }
1323 #else
1324 /* if some code is already present, then the pages are already
1325 protected. So we handle the case where only the first TB is
1326 allocated in a physical page */
1327 if (!page_already_protected) {
1328 tlb_protect_code(page_addr);
1329 }
1330 #endif
1331
1332 #endif /* TARGET_HAS_SMC */
1333 }
1334
1335 /* add a new TB and link it to the physical page tables. phys_page2 is
1336 (-1) to indicate that only one page contains the TB. */
1337 void tb_link_page(TranslationBlock *tb,
1338 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1339 {
1340 unsigned int h;
1341 TranslationBlock **ptb;
1342
1343 /* Grab the mmap lock to stop another thread invalidating this TB
1344 before we are done. */
1345 mmap_lock();
1346 /* add in the physical hash table */
1347 h = tb_phys_hash_func(phys_pc);
1348 ptb = &tb_phys_hash[h];
1349 tb->phys_hash_next = *ptb;
1350 *ptb = tb;
1351
1352 /* add in the page list */
1353 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1354 if (phys_page2 != -1)
1355 tb_alloc_page(tb, 1, phys_page2);
1356 else
1357 tb->page_addr[1] = -1;
1358
1359 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1360 tb->jmp_next[0] = NULL;
1361 tb->jmp_next[1] = NULL;
1362
1363 /* init original jump addresses */
1364 if (tb->tb_next_offset[0] != 0xffff)
1365 tb_reset_jump(tb, 0);
1366 if (tb->tb_next_offset[1] != 0xffff)
1367 tb_reset_jump(tb, 1);
1368
1369 #ifdef DEBUG_TB_CHECK
1370 tb_page_check();
1371 #endif
1372 mmap_unlock();
1373 }
1374
1375 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1376 tb[1].tc_ptr. Return NULL if not found */
1377 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1378 {
1379 int m_min, m_max, m;
1380 uintptr_t v;
1381 TranslationBlock *tb;
1382
1383 if (nb_tbs <= 0)
1384 return NULL;
1385 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1386 tc_ptr >= (uintptr_t)code_gen_ptr) {
1387 return NULL;
1388 }
1389 /* binary search (cf Knuth) */
1390 m_min = 0;
1391 m_max = nb_tbs - 1;
1392 while (m_min <= m_max) {
1393 m = (m_min + m_max) >> 1;
1394 tb = &tbs[m];
1395 v = (uintptr_t)tb->tc_ptr;
1396 if (v == tc_ptr)
1397 return tb;
1398 else if (tc_ptr < v) {
1399 m_max = m - 1;
1400 } else {
1401 m_min = m + 1;
1402 }
1403 }
1404 return &tbs[m_max];
1405 }
1406
1407 static void tb_reset_jump_recursive(TranslationBlock *tb);
1408
1409 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1410 {
1411 TranslationBlock *tb1, *tb_next, **ptb;
1412 unsigned int n1;
1413
1414 tb1 = tb->jmp_next[n];
1415 if (tb1 != NULL) {
1416 /* find head of list */
1417 for(;;) {
1418 n1 = (uintptr_t)tb1 & 3;
1419 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1420 if (n1 == 2)
1421 break;
1422 tb1 = tb1->jmp_next[n1];
1423 }
1424 /* we are now sure now that tb jumps to tb1 */
1425 tb_next = tb1;
1426
1427 /* remove tb from the jmp_first list */
1428 ptb = &tb_next->jmp_first;
1429 for(;;) {
1430 tb1 = *ptb;
1431 n1 = (uintptr_t)tb1 & 3;
1432 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1433 if (n1 == n && tb1 == tb)
1434 break;
1435 ptb = &tb1->jmp_next[n1];
1436 }
1437 *ptb = tb->jmp_next[n];
1438 tb->jmp_next[n] = NULL;
1439
1440 /* suppress the jump to next tb in generated code */
1441 tb_reset_jump(tb, n);
1442
1443 /* suppress jumps in the tb on which we could have jumped */
1444 tb_reset_jump_recursive(tb_next);
1445 }
1446 }
1447
1448 static void tb_reset_jump_recursive(TranslationBlock *tb)
1449 {
1450 tb_reset_jump_recursive2(tb, 0);
1451 tb_reset_jump_recursive2(tb, 1);
1452 }
1453
1454 #if defined(TARGET_HAS_ICE)
1455 #if defined(CONFIG_USER_ONLY)
1456 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1457 {
1458 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1459 }
1460 #else
1461 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1462 {
1463 ram_addr_t ram_addr;
1464 MemoryRegionSection *section;
1465
1466 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1467 if (!(memory_region_is_ram(section->mr)
1468 || (section->mr->rom_device && section->mr->readable))) {
1469 return;
1470 }
1471 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1472 + memory_region_section_addr(section, addr);
1473 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1474 }
1475
1476 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1477 {
1478 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc));
1479 }
1480 #endif
1481 #endif /* TARGET_HAS_ICE */
1482
1483 #if defined(CONFIG_USER_ONLY)
1484 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1485
1486 {
1487 }
1488
1489 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1490 int flags, CPUWatchpoint **watchpoint)
1491 {
1492 return -ENOSYS;
1493 }
1494 #else
1495 /* Add a watchpoint. */
1496 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1497 int flags, CPUWatchpoint **watchpoint)
1498 {
1499 target_ulong len_mask = ~(len - 1);
1500 CPUWatchpoint *wp;
1501
1502 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1503 if ((len & (len - 1)) || (addr & ~len_mask) ||
1504 len == 0 || len > TARGET_PAGE_SIZE) {
1505 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1506 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1507 return -EINVAL;
1508 }
1509 wp = g_malloc(sizeof(*wp));
1510
1511 wp->vaddr = addr;
1512 wp->len_mask = len_mask;
1513 wp->flags = flags;
1514
1515 /* keep all GDB-injected watchpoints in front */
1516 if (flags & BP_GDB)
1517 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1518 else
1519 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1520
1521 tlb_flush_page(env, addr);
1522
1523 if (watchpoint)
1524 *watchpoint = wp;
1525 return 0;
1526 }
1527
1528 /* Remove a specific watchpoint. */
1529 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1530 int flags)
1531 {
1532 target_ulong len_mask = ~(len - 1);
1533 CPUWatchpoint *wp;
1534
1535 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1536 if (addr == wp->vaddr && len_mask == wp->len_mask
1537 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1538 cpu_watchpoint_remove_by_ref(env, wp);
1539 return 0;
1540 }
1541 }
1542 return -ENOENT;
1543 }
1544
1545 /* Remove a specific watchpoint by reference. */
1546 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1547 {
1548 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1549
1550 tlb_flush_page(env, watchpoint->vaddr);
1551
1552 g_free(watchpoint);
1553 }
1554
1555 /* Remove all matching watchpoints. */
1556 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1557 {
1558 CPUWatchpoint *wp, *next;
1559
1560 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1561 if (wp->flags & mask)
1562 cpu_watchpoint_remove_by_ref(env, wp);
1563 }
1564 }
1565 #endif
1566
1567 /* Add a breakpoint. */
1568 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1569 CPUBreakpoint **breakpoint)
1570 {
1571 #if defined(TARGET_HAS_ICE)
1572 CPUBreakpoint *bp;
1573
1574 bp = g_malloc(sizeof(*bp));
1575
1576 bp->pc = pc;
1577 bp->flags = flags;
1578
1579 /* keep all GDB-injected breakpoints in front */
1580 if (flags & BP_GDB)
1581 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1582 else
1583 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1584
1585 breakpoint_invalidate(env, pc);
1586
1587 if (breakpoint)
1588 *breakpoint = bp;
1589 return 0;
1590 #else
1591 return -ENOSYS;
1592 #endif
1593 }
1594
1595 /* Remove a specific breakpoint. */
1596 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1597 {
1598 #if defined(TARGET_HAS_ICE)
1599 CPUBreakpoint *bp;
1600
1601 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1602 if (bp->pc == pc && bp->flags == flags) {
1603 cpu_breakpoint_remove_by_ref(env, bp);
1604 return 0;
1605 }
1606 }
1607 return -ENOENT;
1608 #else
1609 return -ENOSYS;
1610 #endif
1611 }
1612
1613 /* Remove a specific breakpoint by reference. */
1614 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1615 {
1616 #if defined(TARGET_HAS_ICE)
1617 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1618
1619 breakpoint_invalidate(env, breakpoint->pc);
1620
1621 g_free(breakpoint);
1622 #endif
1623 }
1624
1625 /* Remove all matching breakpoints. */
1626 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1627 {
1628 #if defined(TARGET_HAS_ICE)
1629 CPUBreakpoint *bp, *next;
1630
1631 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1632 if (bp->flags & mask)
1633 cpu_breakpoint_remove_by_ref(env, bp);
1634 }
1635 #endif
1636 }
1637
1638 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1639 CPU loop after each instruction */
1640 void cpu_single_step(CPUArchState *env, int enabled)
1641 {
1642 #if defined(TARGET_HAS_ICE)
1643 if (env->singlestep_enabled != enabled) {
1644 env->singlestep_enabled = enabled;
1645 if (kvm_enabled())
1646 kvm_update_guest_debug(env, 0);
1647 else {
1648 /* must flush all the translated code to avoid inconsistencies */
1649 /* XXX: only flush what is necessary */
1650 tb_flush(env);
1651 }
1652 }
1653 #endif
1654 }
1655
1656 /* enable or disable low levels log */
1657 void cpu_set_log(int log_flags)
1658 {
1659 loglevel = log_flags;
1660 if (loglevel && !logfile) {
1661 logfile = fopen(logfilename, log_append ? "a" : "w");
1662 if (!logfile) {
1663 perror(logfilename);
1664 _exit(1);
1665 }
1666 #if !defined(CONFIG_SOFTMMU)
1667 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1668 {
1669 static char logfile_buf[4096];
1670 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1671 }
1672 #elif defined(_WIN32)
1673 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1674 setvbuf(logfile, NULL, _IONBF, 0);
1675 #else
1676 setvbuf(logfile, NULL, _IOLBF, 0);
1677 #endif
1678 log_append = 1;
1679 }
1680 if (!loglevel && logfile) {
1681 fclose(logfile);
1682 logfile = NULL;
1683 }
1684 }
1685
1686 void cpu_set_log_filename(const char *filename)
1687 {
1688 logfilename = strdup(filename);
1689 if (logfile) {
1690 fclose(logfile);
1691 logfile = NULL;
1692 }
1693 cpu_set_log(loglevel);
1694 }
1695
1696 static void cpu_unlink_tb(CPUArchState *env)
1697 {
1698 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1699 problem and hope the cpu will stop of its own accord. For userspace
1700 emulation this often isn't actually as bad as it sounds. Often
1701 signals are used primarily to interrupt blocking syscalls. */
1702 TranslationBlock *tb;
1703 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1704
1705 spin_lock(&interrupt_lock);
1706 tb = env->current_tb;
1707 /* if the cpu is currently executing code, we must unlink it and
1708 all the potentially executing TB */
1709 if (tb) {
1710 env->current_tb = NULL;
1711 tb_reset_jump_recursive(tb);
1712 }
1713 spin_unlock(&interrupt_lock);
1714 }
1715
1716 #ifndef CONFIG_USER_ONLY
1717 /* mask must never be zero, except for A20 change call */
1718 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1719 {
1720 int old_mask;
1721
1722 old_mask = env->interrupt_request;
1723 env->interrupt_request |= mask;
1724
1725 /*
1726 * If called from iothread context, wake the target cpu in
1727 * case its halted.
1728 */
1729 if (!qemu_cpu_is_self(env)) {
1730 qemu_cpu_kick(env);
1731 return;
1732 }
1733
1734 if (use_icount) {
1735 env->icount_decr.u16.high = 0xffff;
1736 if (!can_do_io(env)
1737 && (mask & ~old_mask) != 0) {
1738 cpu_abort(env, "Raised interrupt while not in I/O function");
1739 }
1740 } else {
1741 cpu_unlink_tb(env);
1742 }
1743 }
1744
1745 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1746
1747 #else /* CONFIG_USER_ONLY */
1748
1749 void cpu_interrupt(CPUArchState *env, int mask)
1750 {
1751 env->interrupt_request |= mask;
1752 cpu_unlink_tb(env);
1753 }
1754 #endif /* CONFIG_USER_ONLY */
1755
1756 void cpu_reset_interrupt(CPUArchState *env, int mask)
1757 {
1758 env->interrupt_request &= ~mask;
1759 }
1760
1761 void cpu_exit(CPUArchState *env)
1762 {
1763 env->exit_request = 1;
1764 cpu_unlink_tb(env);
1765 }
1766
1767 const CPULogItem cpu_log_items[] = {
1768 { CPU_LOG_TB_OUT_ASM, "out_asm",
1769 "show generated host assembly code for each compiled TB" },
1770 { CPU_LOG_TB_IN_ASM, "in_asm",
1771 "show target assembly code for each compiled TB" },
1772 { CPU_LOG_TB_OP, "op",
1773 "show micro ops for each compiled TB" },
1774 { CPU_LOG_TB_OP_OPT, "op_opt",
1775 "show micro ops "
1776 #ifdef TARGET_I386
1777 "before eflags optimization and "
1778 #endif
1779 "after liveness analysis" },
1780 { CPU_LOG_INT, "int",
1781 "show interrupts/exceptions in short format" },
1782 { CPU_LOG_EXEC, "exec",
1783 "show trace before each executed TB (lots of logs)" },
1784 { CPU_LOG_TB_CPU, "cpu",
1785 "show CPU state before block translation" },
1786 #ifdef TARGET_I386
1787 { CPU_LOG_PCALL, "pcall",
1788 "show protected mode far calls/returns/exceptions" },
1789 { CPU_LOG_RESET, "cpu_reset",
1790 "show CPU state before CPU resets" },
1791 #endif
1792 #ifdef DEBUG_IOPORT
1793 { CPU_LOG_IOPORT, "ioport",
1794 "show all i/o ports accesses" },
1795 #endif
1796 { 0, NULL, NULL },
1797 };
1798
1799 static int cmp1(const char *s1, int n, const char *s2)
1800 {
1801 if (strlen(s2) != n)
1802 return 0;
1803 return memcmp(s1, s2, n) == 0;
1804 }
1805
1806 /* takes a comma separated list of log masks. Return 0 if error. */
1807 int cpu_str_to_log_mask(const char *str)
1808 {
1809 const CPULogItem *item;
1810 int mask;
1811 const char *p, *p1;
1812
1813 p = str;
1814 mask = 0;
1815 for(;;) {
1816 p1 = strchr(p, ',');
1817 if (!p1)
1818 p1 = p + strlen(p);
1819 if(cmp1(p,p1-p,"all")) {
1820 for(item = cpu_log_items; item->mask != 0; item++) {
1821 mask |= item->mask;
1822 }
1823 } else {
1824 for(item = cpu_log_items; item->mask != 0; item++) {
1825 if (cmp1(p, p1 - p, item->name))
1826 goto found;
1827 }
1828 return 0;
1829 }
1830 found:
1831 mask |= item->mask;
1832 if (*p1 != ',')
1833 break;
1834 p = p1 + 1;
1835 }
1836 return mask;
1837 }
1838
1839 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1840 {
1841 va_list ap;
1842 va_list ap2;
1843
1844 va_start(ap, fmt);
1845 va_copy(ap2, ap);
1846 fprintf(stderr, "qemu: fatal: ");
1847 vfprintf(stderr, fmt, ap);
1848 fprintf(stderr, "\n");
1849 #ifdef TARGET_I386
1850 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1851 #else
1852 cpu_dump_state(env, stderr, fprintf, 0);
1853 #endif
1854 if (qemu_log_enabled()) {
1855 qemu_log("qemu: fatal: ");
1856 qemu_log_vprintf(fmt, ap2);
1857 qemu_log("\n");
1858 #ifdef TARGET_I386
1859 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1860 #else
1861 log_cpu_state(env, 0);
1862 #endif
1863 qemu_log_flush();
1864 qemu_log_close();
1865 }
1866 va_end(ap2);
1867 va_end(ap);
1868 #if defined(CONFIG_USER_ONLY)
1869 {
1870 struct sigaction act;
1871 sigfillset(&act.sa_mask);
1872 act.sa_handler = SIG_DFL;
1873 sigaction(SIGABRT, &act, NULL);
1874 }
1875 #endif
1876 abort();
1877 }
1878
1879 CPUArchState *cpu_copy(CPUArchState *env)
1880 {
1881 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1882 CPUArchState *next_cpu = new_env->next_cpu;
1883 int cpu_index = new_env->cpu_index;
1884 #if defined(TARGET_HAS_ICE)
1885 CPUBreakpoint *bp;
1886 CPUWatchpoint *wp;
1887 #endif
1888
1889 memcpy(new_env, env, sizeof(CPUArchState));
1890
1891 /* Preserve chaining and index. */
1892 new_env->next_cpu = next_cpu;
1893 new_env->cpu_index = cpu_index;
1894
1895 /* Clone all break/watchpoints.
1896 Note: Once we support ptrace with hw-debug register access, make sure
1897 BP_CPU break/watchpoints are handled correctly on clone. */
1898 QTAILQ_INIT(&env->breakpoints);
1899 QTAILQ_INIT(&env->watchpoints);
1900 #if defined(TARGET_HAS_ICE)
1901 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1902 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1903 }
1904 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1905 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1906 wp->flags, NULL);
1907 }
1908 #endif
1909
1910 return new_env;
1911 }
1912
1913 #if !defined(CONFIG_USER_ONLY)
1914 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1915 {
1916 unsigned int i;
1917
1918 /* Discard jump cache entries for any tb which might potentially
1919 overlap the flushed page. */
1920 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1921 memset (&env->tb_jmp_cache[i], 0,
1922 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1923
1924 i = tb_jmp_cache_hash_page(addr);
1925 memset (&env->tb_jmp_cache[i], 0,
1926 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1927 }
1928
1929 /* Note: start and end must be within the same ram block. */
1930 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1931 int dirty_flags)
1932 {
1933 uintptr_t length, start1;
1934
1935 start &= TARGET_PAGE_MASK;
1936 end = TARGET_PAGE_ALIGN(end);
1937
1938 length = end - start;
1939 if (length == 0)
1940 return;
1941 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1942
1943 /* we modify the TLB cache so that the dirty bit will be set again
1944 when accessing the range */
1945 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1946 /* Check that we don't span multiple blocks - this breaks the
1947 address comparisons below. */
1948 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1949 != (end - 1) - start) {
1950 abort();
1951 }
1952 cpu_tlb_reset_dirty_all(start1, length);
1953 }
1954
1955 int cpu_physical_memory_set_dirty_tracking(int enable)
1956 {
1957 int ret = 0;
1958 in_migration = enable;
1959 return ret;
1960 }
1961
1962 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1963 MemoryRegionSection *section,
1964 target_ulong vaddr,
1965 target_phys_addr_t paddr,
1966 int prot,
1967 target_ulong *address)
1968 {
1969 target_phys_addr_t iotlb;
1970 CPUWatchpoint *wp;
1971
1972 if (memory_region_is_ram(section->mr)) {
1973 /* Normal RAM. */
1974 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1975 + memory_region_section_addr(section, paddr);
1976 if (!section->readonly) {
1977 iotlb |= phys_section_notdirty;
1978 } else {
1979 iotlb |= phys_section_rom;
1980 }
1981 } else {
1982 /* IO handlers are currently passed a physical address.
1983 It would be nice to pass an offset from the base address
1984 of that region. This would avoid having to special case RAM,
1985 and avoid full address decoding in every device.
1986 We can't use the high bits of pd for this because
1987 IO_MEM_ROMD uses these as a ram address. */
1988 iotlb = section - phys_sections;
1989 iotlb += memory_region_section_addr(section, paddr);
1990 }
1991
1992 /* Make accesses to pages with watchpoints go via the
1993 watchpoint trap routines. */
1994 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1995 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1996 /* Avoid trapping reads of pages with a write breakpoint. */
1997 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1998 iotlb = phys_section_watch + paddr;
1999 *address |= TLB_MMIO;
2000 break;
2001 }
2002 }
2003 }
2004
2005 return iotlb;
2006 }
2007
2008 #else
2009 /*
2010 * Walks guest process memory "regions" one by one
2011 * and calls callback function 'fn' for each region.
2012 */
2013
2014 struct walk_memory_regions_data
2015 {
2016 walk_memory_regions_fn fn;
2017 void *priv;
2018 uintptr_t start;
2019 int prot;
2020 };
2021
2022 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2023 abi_ulong end, int new_prot)
2024 {
2025 if (data->start != -1ul) {
2026 int rc = data->fn(data->priv, data->start, end, data->prot);
2027 if (rc != 0) {
2028 return rc;
2029 }
2030 }
2031
2032 data->start = (new_prot ? end : -1ul);
2033 data->prot = new_prot;
2034
2035 return 0;
2036 }
2037
2038 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2039 abi_ulong base, int level, void **lp)
2040 {
2041 abi_ulong pa;
2042 int i, rc;
2043
2044 if (*lp == NULL) {
2045 return walk_memory_regions_end(data, base, 0);
2046 }
2047
2048 if (level == 0) {
2049 PageDesc *pd = *lp;
2050 for (i = 0; i < L2_SIZE; ++i) {
2051 int prot = pd[i].flags;
2052
2053 pa = base | (i << TARGET_PAGE_BITS);
2054 if (prot != data->prot) {
2055 rc = walk_memory_regions_end(data, pa, prot);
2056 if (rc != 0) {
2057 return rc;
2058 }
2059 }
2060 }
2061 } else {
2062 void **pp = *lp;
2063 for (i = 0; i < L2_SIZE; ++i) {
2064 pa = base | ((abi_ulong)i <<
2065 (TARGET_PAGE_BITS + L2_BITS * level));
2066 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2067 if (rc != 0) {
2068 return rc;
2069 }
2070 }
2071 }
2072
2073 return 0;
2074 }
2075
2076 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2077 {
2078 struct walk_memory_regions_data data;
2079 uintptr_t i;
2080
2081 data.fn = fn;
2082 data.priv = priv;
2083 data.start = -1ul;
2084 data.prot = 0;
2085
2086 for (i = 0; i < V_L1_SIZE; i++) {
2087 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2088 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2089 if (rc != 0) {
2090 return rc;
2091 }
2092 }
2093
2094 return walk_memory_regions_end(&data, 0, 0);
2095 }
2096
2097 static int dump_region(void *priv, abi_ulong start,
2098 abi_ulong end, unsigned long prot)
2099 {
2100 FILE *f = (FILE *)priv;
2101
2102 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2103 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2104 start, end, end - start,
2105 ((prot & PAGE_READ) ? 'r' : '-'),
2106 ((prot & PAGE_WRITE) ? 'w' : '-'),
2107 ((prot & PAGE_EXEC) ? 'x' : '-'));
2108
2109 return (0);
2110 }
2111
2112 /* dump memory mappings */
2113 void page_dump(FILE *f)
2114 {
2115 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2116 "start", "end", "size", "prot");
2117 walk_memory_regions(f, dump_region);
2118 }
2119
2120 int page_get_flags(target_ulong address)
2121 {
2122 PageDesc *p;
2123
2124 p = page_find(address >> TARGET_PAGE_BITS);
2125 if (!p)
2126 return 0;
2127 return p->flags;
2128 }
2129
2130 /* Modify the flags of a page and invalidate the code if necessary.
2131 The flag PAGE_WRITE_ORG is positioned automatically depending
2132 on PAGE_WRITE. The mmap_lock should already be held. */
2133 void page_set_flags(target_ulong start, target_ulong end, int flags)
2134 {
2135 target_ulong addr, len;
2136
2137 /* This function should never be called with addresses outside the
2138 guest address space. If this assert fires, it probably indicates
2139 a missing call to h2g_valid. */
2140 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2141 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2142 #endif
2143 assert(start < end);
2144
2145 start = start & TARGET_PAGE_MASK;
2146 end = TARGET_PAGE_ALIGN(end);
2147
2148 if (flags & PAGE_WRITE) {
2149 flags |= PAGE_WRITE_ORG;
2150 }
2151
2152 for (addr = start, len = end - start;
2153 len != 0;
2154 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2155 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2156
2157 /* If the write protection bit is set, then we invalidate
2158 the code inside. */
2159 if (!(p->flags & PAGE_WRITE) &&
2160 (flags & PAGE_WRITE) &&
2161 p->first_tb) {
2162 tb_invalidate_phys_page(addr, 0, NULL);
2163 }
2164 p->flags = flags;
2165 }
2166 }
2167
2168 int page_check_range(target_ulong start, target_ulong len, int flags)
2169 {
2170 PageDesc *p;
2171 target_ulong end;
2172 target_ulong addr;
2173
2174 /* This function should never be called with addresses outside the
2175 guest address space. If this assert fires, it probably indicates
2176 a missing call to h2g_valid. */
2177 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2178 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2179 #endif
2180
2181 if (len == 0) {
2182 return 0;
2183 }
2184 if (start + len - 1 < start) {
2185 /* We've wrapped around. */
2186 return -1;
2187 }
2188
2189 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2190 start = start & TARGET_PAGE_MASK;
2191
2192 for (addr = start, len = end - start;
2193 len != 0;
2194 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2195 p = page_find(addr >> TARGET_PAGE_BITS);
2196 if( !p )
2197 return -1;
2198 if( !(p->flags & PAGE_VALID) )
2199 return -1;
2200
2201 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2202 return -1;
2203 if (flags & PAGE_WRITE) {
2204 if (!(p->flags & PAGE_WRITE_ORG))
2205 return -1;
2206 /* unprotect the page if it was put read-only because it
2207 contains translated code */
2208 if (!(p->flags & PAGE_WRITE)) {
2209 if (!page_unprotect(addr, 0, NULL))
2210 return -1;
2211 }
2212 return 0;
2213 }
2214 }
2215 return 0;
2216 }
2217
2218 /* called from signal handler: invalidate the code and unprotect the
2219 page. Return TRUE if the fault was successfully handled. */
2220 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2221 {
2222 unsigned int prot;
2223 PageDesc *p;
2224 target_ulong host_start, host_end, addr;
2225
2226 /* Technically this isn't safe inside a signal handler. However we
2227 know this only ever happens in a synchronous SEGV handler, so in
2228 practice it seems to be ok. */
2229 mmap_lock();
2230
2231 p = page_find(address >> TARGET_PAGE_BITS);
2232 if (!p) {
2233 mmap_unlock();
2234 return 0;
2235 }
2236
2237 /* if the page was really writable, then we change its
2238 protection back to writable */
2239 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2240 host_start = address & qemu_host_page_mask;
2241 host_end = host_start + qemu_host_page_size;
2242
2243 prot = 0;
2244 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2245 p = page_find(addr >> TARGET_PAGE_BITS);
2246 p->flags |= PAGE_WRITE;
2247 prot |= p->flags;
2248
2249 /* and since the content will be modified, we must invalidate
2250 the corresponding translated code. */
2251 tb_invalidate_phys_page(addr, pc, puc);
2252 #ifdef DEBUG_TB_CHECK
2253 tb_invalidate_check(addr);
2254 #endif
2255 }
2256 mprotect((void *)g2h(host_start), qemu_host_page_size,
2257 prot & PAGE_BITS);
2258
2259 mmap_unlock();
2260 return 1;
2261 }
2262 mmap_unlock();
2263 return 0;
2264 }
2265 #endif /* defined(CONFIG_USER_ONLY) */
2266
2267 #if !defined(CONFIG_USER_ONLY)
2268
2269 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2270 typedef struct subpage_t {
2271 MemoryRegion iomem;
2272 target_phys_addr_t base;
2273 uint16_t sub_section[TARGET_PAGE_SIZE];
2274 } subpage_t;
2275
2276 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2277 uint16_t section);
2278 static subpage_t *subpage_init(target_phys_addr_t base);
2279 static void destroy_page_desc(uint16_t section_index)
2280 {
2281 MemoryRegionSection *section = &phys_sections[section_index];
2282 MemoryRegion *mr = section->mr;
2283
2284 if (mr->subpage) {
2285 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2286 memory_region_destroy(&subpage->iomem);
2287 g_free(subpage);
2288 }
2289 }
2290
2291 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2292 {
2293 unsigned i;
2294 PhysPageEntry *p;
2295
2296 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2297 return;
2298 }
2299
2300 p = phys_map_nodes[lp->ptr];
2301 for (i = 0; i < L2_SIZE; ++i) {
2302 if (!p[i].is_leaf) {
2303 destroy_l2_mapping(&p[i], level - 1);
2304 } else {
2305 destroy_page_desc(p[i].ptr);
2306 }
2307 }
2308 lp->is_leaf = 0;
2309 lp->ptr = PHYS_MAP_NODE_NIL;
2310 }
2311
2312 static void destroy_all_mappings(void)
2313 {
2314 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2315 phys_map_nodes_reset();
2316 }
2317
2318 static uint16_t phys_section_add(MemoryRegionSection *section)
2319 {
2320 if (phys_sections_nb == phys_sections_nb_alloc) {
2321 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2322 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2323 phys_sections_nb_alloc);
2324 }
2325 phys_sections[phys_sections_nb] = *section;
2326 return phys_sections_nb++;
2327 }
2328
2329 static void phys_sections_clear(void)
2330 {
2331 phys_sections_nb = 0;
2332 }
2333
2334 /* register physical memory.
2335 For RAM, 'size' must be a multiple of the target page size.
2336 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2337 io memory page. The address used when calling the IO function is
2338 the offset from the start of the region, plus region_offset. Both
2339 start_addr and region_offset are rounded down to a page boundary
2340 before calculating this offset. This should not be a problem unless
2341 the low bits of start_addr and region_offset differ. */
2342 static void register_subpage(MemoryRegionSection *section)
2343 {
2344 subpage_t *subpage;
2345 target_phys_addr_t base = section->offset_within_address_space
2346 & TARGET_PAGE_MASK;
2347 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2348 MemoryRegionSection subsection = {
2349 .offset_within_address_space = base,
2350 .size = TARGET_PAGE_SIZE,
2351 };
2352 target_phys_addr_t start, end;
2353
2354 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2355
2356 if (!(existing->mr->subpage)) {
2357 subpage = subpage_init(base);
2358 subsection.mr = &subpage->iomem;
2359 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2360 phys_section_add(&subsection));
2361 } else {
2362 subpage = container_of(existing->mr, subpage_t, iomem);
2363 }
2364 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2365 end = start + section->size;
2366 subpage_register(subpage, start, end, phys_section_add(section));
2367 }
2368
2369
2370 static void register_multipage(MemoryRegionSection *section)
2371 {
2372 target_phys_addr_t start_addr = section->offset_within_address_space;
2373 ram_addr_t size = section->size;
2374 target_phys_addr_t addr;
2375 uint16_t section_index = phys_section_add(section);
2376
2377 assert(size);
2378
2379 addr = start_addr;
2380 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2381 section_index);
2382 }
2383
2384 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2385 bool readonly)
2386 {
2387 MemoryRegionSection now = *section, remain = *section;
2388
2389 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2390 || (now.size < TARGET_PAGE_SIZE)) {
2391 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2392 - now.offset_within_address_space,
2393 now.size);
2394 register_subpage(&now);
2395 remain.size -= now.size;
2396 remain.offset_within_address_space += now.size;
2397 remain.offset_within_region += now.size;
2398 }
2399 now = remain;
2400 now.size &= TARGET_PAGE_MASK;
2401 if (now.size) {
2402 register_multipage(&now);
2403 remain.size -= now.size;
2404 remain.offset_within_address_space += now.size;
2405 remain.offset_within_region += now.size;
2406 }
2407 now = remain;
2408 if (now.size) {
2409 register_subpage(&now);
2410 }
2411 }
2412
2413
2414 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2415 {
2416 if (kvm_enabled())
2417 kvm_coalesce_mmio_region(addr, size);
2418 }
2419
2420 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2421 {
2422 if (kvm_enabled())
2423 kvm_uncoalesce_mmio_region(addr, size);
2424 }
2425
2426 void qemu_flush_coalesced_mmio_buffer(void)
2427 {
2428 if (kvm_enabled())
2429 kvm_flush_coalesced_mmio_buffer();
2430 }
2431
2432 #if defined(__linux__) && !defined(TARGET_S390X)
2433
2434 #include <sys/vfs.h>
2435
2436 #define HUGETLBFS_MAGIC 0x958458f6
2437
2438 static long gethugepagesize(const char *path)
2439 {
2440 struct statfs fs;
2441 int ret;
2442
2443 do {
2444 ret = statfs(path, &fs);
2445 } while (ret != 0 && errno == EINTR);
2446
2447 if (ret != 0) {
2448 perror(path);
2449 return 0;
2450 }
2451
2452 if (fs.f_type != HUGETLBFS_MAGIC)
2453 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2454
2455 return fs.f_bsize;
2456 }
2457
2458 static void *file_ram_alloc(RAMBlock *block,
2459 ram_addr_t memory,
2460 const char *path)
2461 {
2462 char *filename;
2463 void *area;
2464 int fd;
2465 #ifdef MAP_POPULATE
2466 int flags;
2467 #endif
2468 unsigned long hpagesize;
2469
2470 hpagesize = gethugepagesize(path);
2471 if (!hpagesize) {
2472 return NULL;
2473 }
2474
2475 if (memory < hpagesize) {
2476 return NULL;
2477 }
2478
2479 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2480 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2481 return NULL;
2482 }
2483
2484 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2485 return NULL;
2486 }
2487
2488 fd = mkstemp(filename);
2489 if (fd < 0) {
2490 perror("unable to create backing store for hugepages");
2491 free(filename);
2492 return NULL;
2493 }
2494 unlink(filename);
2495 free(filename);
2496
2497 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2498
2499 /*
2500 * ftruncate is not supported by hugetlbfs in older
2501 * hosts, so don't bother bailing out on errors.
2502 * If anything goes wrong with it under other filesystems,
2503 * mmap will fail.
2504 */
2505 if (ftruncate(fd, memory))
2506 perror("ftruncate");
2507
2508 #ifdef MAP_POPULATE
2509 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2510 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2511 * to sidestep this quirk.
2512 */
2513 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2514 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2515 #else
2516 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2517 #endif
2518 if (area == MAP_FAILED) {
2519 perror("file_ram_alloc: can't mmap RAM pages");
2520 close(fd);
2521 return (NULL);
2522 }
2523 block->fd = fd;
2524 return area;
2525 }
2526 #endif
2527
2528 static ram_addr_t find_ram_offset(ram_addr_t size)
2529 {
2530 RAMBlock *block, *next_block;
2531 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2532
2533 if (QLIST_EMPTY(&ram_list.blocks))
2534 return 0;
2535
2536 QLIST_FOREACH(block, &ram_list.blocks, next) {
2537 ram_addr_t end, next = RAM_ADDR_MAX;
2538
2539 end = block->offset + block->length;
2540
2541 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2542 if (next_block->offset >= end) {
2543 next = MIN(next, next_block->offset);
2544 }
2545 }
2546 if (next - end >= size && next - end < mingap) {
2547 offset = end;
2548 mingap = next - end;
2549 }
2550 }
2551
2552 if (offset == RAM_ADDR_MAX) {
2553 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2554 (uint64_t)size);
2555 abort();
2556 }
2557
2558 return offset;
2559 }
2560
2561 static ram_addr_t last_ram_offset(void)
2562 {
2563 RAMBlock *block;
2564 ram_addr_t last = 0;
2565
2566 QLIST_FOREACH(block, &ram_list.blocks, next)
2567 last = MAX(last, block->offset + block->length);
2568
2569 return last;
2570 }
2571
2572 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2573 {
2574 RAMBlock *new_block, *block;
2575
2576 new_block = NULL;
2577 QLIST_FOREACH(block, &ram_list.blocks, next) {
2578 if (block->offset == addr) {
2579 new_block = block;
2580 break;
2581 }
2582 }
2583 assert(new_block);
2584 assert(!new_block->idstr[0]);
2585
2586 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2587 char *id = dev->parent_bus->info->get_dev_path(dev);
2588 if (id) {
2589 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2590 g_free(id);
2591 }
2592 }
2593 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2594
2595 QLIST_FOREACH(block, &ram_list.blocks, next) {
2596 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2597 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2598 new_block->idstr);
2599 abort();
2600 }
2601 }
2602 }
2603
2604 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2605 MemoryRegion *mr)
2606 {
2607 RAMBlock *new_block;
2608
2609 size = TARGET_PAGE_ALIGN(size);
2610 new_block = g_malloc0(sizeof(*new_block));
2611
2612 new_block->mr = mr;
2613 new_block->offset = find_ram_offset(size);
2614 if (host) {
2615 new_block->host = host;
2616 new_block->flags |= RAM_PREALLOC_MASK;
2617 } else {
2618 if (mem_path) {
2619 #if defined (__linux__) && !defined(TARGET_S390X)
2620 new_block->host = file_ram_alloc(new_block, size, mem_path);
2621 if (!new_block->host) {
2622 new_block->host = qemu_vmalloc(size);
2623 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2624 }
2625 #else
2626 fprintf(stderr, "-mem-path option unsupported\n");
2627 exit(1);
2628 #endif
2629 } else {
2630 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2631 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2632 an system defined value, which is at least 256GB. Larger systems
2633 have larger values. We put the guest between the end of data
2634 segment (system break) and this value. We use 32GB as a base to
2635 have enough room for the system break to grow. */
2636 new_block->host = mmap((void*)0x800000000, size,
2637 PROT_EXEC|PROT_READ|PROT_WRITE,
2638 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2639 if (new_block->host == MAP_FAILED) {
2640 fprintf(stderr, "Allocating RAM failed\n");
2641 abort();
2642 }
2643 #else
2644 if (xen_enabled()) {
2645 xen_ram_alloc(new_block->offset, size, mr);
2646 } else {
2647 new_block->host = qemu_vmalloc(size);
2648 }
2649 #endif
2650 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2651 }
2652 }
2653 new_block->length = size;
2654
2655 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2656
2657 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2658 last_ram_offset() >> TARGET_PAGE_BITS);
2659 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2660 0xff, size >> TARGET_PAGE_BITS);
2661
2662 if (kvm_enabled())
2663 kvm_setup_guest_memory(new_block->host, size);
2664
2665 return new_block->offset;
2666 }
2667
2668 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2669 {
2670 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2671 }
2672
2673 void qemu_ram_free_from_ptr(ram_addr_t addr)
2674 {
2675 RAMBlock *block;
2676
2677 QLIST_FOREACH(block, &ram_list.blocks, next) {
2678 if (addr == block->offset) {
2679 QLIST_REMOVE(block, next);
2680 g_free(block);
2681 return;
2682 }
2683 }
2684 }
2685
2686 void qemu_ram_free(ram_addr_t addr)
2687 {
2688 RAMBlock *block;
2689
2690 QLIST_FOREACH(block, &ram_list.blocks, next) {
2691 if (addr == block->offset) {
2692 QLIST_REMOVE(block, next);
2693 if (block->flags & RAM_PREALLOC_MASK) {
2694 ;
2695 } else if (mem_path) {
2696 #if defined (__linux__) && !defined(TARGET_S390X)
2697 if (block->fd) {
2698 munmap(block->host, block->length);
2699 close(block->fd);
2700 } else {
2701 qemu_vfree(block->host);
2702 }
2703 #else
2704 abort();
2705 #endif
2706 } else {
2707 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2708 munmap(block->host, block->length);
2709 #else
2710 if (xen_enabled()) {
2711 xen_invalidate_map_cache_entry(block->host);
2712 } else {
2713 qemu_vfree(block->host);
2714 }
2715 #endif
2716 }
2717 g_free(block);
2718 return;
2719 }
2720 }
2721
2722 }
2723
2724 #ifndef _WIN32
2725 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2726 {
2727 RAMBlock *block;
2728 ram_addr_t offset;
2729 int flags;
2730 void *area, *vaddr;
2731
2732 QLIST_FOREACH(block, &ram_list.blocks, next) {
2733 offset = addr - block->offset;
2734 if (offset < block->length) {
2735 vaddr = block->host + offset;
2736 if (block->flags & RAM_PREALLOC_MASK) {
2737 ;
2738 } else {
2739 flags = MAP_FIXED;
2740 munmap(vaddr, length);
2741 if (mem_path) {
2742 #if defined(__linux__) && !defined(TARGET_S390X)
2743 if (block->fd) {
2744 #ifdef MAP_POPULATE
2745 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2746 MAP_PRIVATE;
2747 #else
2748 flags |= MAP_PRIVATE;
2749 #endif
2750 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2751 flags, block->fd, offset);
2752 } else {
2753 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2754 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2755 flags, -1, 0);
2756 }
2757 #else
2758 abort();
2759 #endif
2760 } else {
2761 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2762 flags |= MAP_SHARED | MAP_ANONYMOUS;
2763 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2764 flags, -1, 0);
2765 #else
2766 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2767 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2768 flags, -1, 0);
2769 #endif
2770 }
2771 if (area != vaddr) {
2772 fprintf(stderr, "Could not remap addr: "
2773 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2774 length, addr);
2775 exit(1);
2776 }
2777 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2778 }
2779 return;
2780 }
2781 }
2782 }
2783 #endif /* !_WIN32 */
2784
2785 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2786 With the exception of the softmmu code in this file, this should
2787 only be used for local memory (e.g. video ram) that the device owns,
2788 and knows it isn't going to access beyond the end of the block.
2789
2790 It should not be used for general purpose DMA.
2791 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2792 */
2793 void *qemu_get_ram_ptr(ram_addr_t addr)
2794 {
2795 RAMBlock *block;
2796
2797 QLIST_FOREACH(block, &ram_list.blocks, next) {
2798 if (addr - block->offset < block->length) {
2799 /* Move this entry to to start of the list. */
2800 if (block != QLIST_FIRST(&ram_list.blocks)) {
2801 QLIST_REMOVE(block, next);
2802 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2803 }
2804 if (xen_enabled()) {
2805 /* We need to check if the requested address is in the RAM
2806 * because we don't want to map the entire memory in QEMU.
2807 * In that case just map until the end of the page.
2808 */
2809 if (block->offset == 0) {
2810 return xen_map_cache(addr, 0, 0);
2811 } else if (block->host == NULL) {
2812 block->host =
2813 xen_map_cache(block->offset, block->length, 1);
2814 }
2815 }
2816 return block->host + (addr - block->offset);
2817 }
2818 }
2819
2820 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2821 abort();
2822
2823 return NULL;
2824 }
2825
2826 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2827 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2828 */
2829 void *qemu_safe_ram_ptr(ram_addr_t addr)
2830 {
2831 RAMBlock *block;
2832
2833 QLIST_FOREACH(block, &ram_list.blocks, next) {
2834 if (addr - block->offset < block->length) {
2835 if (xen_enabled()) {
2836 /* We need to check if the requested address is in the RAM
2837 * because we don't want to map the entire memory in QEMU.
2838 * In that case just map until the end of the page.
2839 */
2840 if (block->offset == 0) {
2841 return xen_map_cache(addr, 0, 0);
2842 } else if (block->host == NULL) {
2843 block->host =
2844 xen_map_cache(block->offset, block->length, 1);
2845 }
2846 }
2847 return block->host + (addr - block->offset);
2848 }
2849 }
2850
2851 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2852 abort();
2853
2854 return NULL;
2855 }
2856
2857 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2858 * but takes a size argument */
2859 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2860 {
2861 if (*size == 0) {
2862 return NULL;
2863 }
2864 if (xen_enabled()) {
2865 return xen_map_cache(addr, *size, 1);
2866 } else {
2867 RAMBlock *block;
2868
2869 QLIST_FOREACH(block, &ram_list.blocks, next) {
2870 if (addr - block->offset < block->length) {
2871 if (addr - block->offset + *size > block->length)
2872 *size = block->length - addr + block->offset;
2873 return block->host + (addr - block->offset);
2874 }
2875 }
2876
2877 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2878 abort();
2879 }
2880 }
2881
2882 void qemu_put_ram_ptr(void *addr)
2883 {
2884 trace_qemu_put_ram_ptr(addr);
2885 }
2886
2887 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2888 {
2889 RAMBlock *block;
2890 uint8_t *host = ptr;
2891
2892 if (xen_enabled()) {
2893 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2894 return 0;
2895 }
2896
2897 QLIST_FOREACH(block, &ram_list.blocks, next) {
2898 /* This case append when the block is not mapped. */
2899 if (block->host == NULL) {
2900 continue;
2901 }
2902 if (host - block->host < block->length) {
2903 *ram_addr = block->offset + (host - block->host);
2904 return 0;
2905 }
2906 }
2907
2908 return -1;
2909 }
2910
2911 /* Some of the softmmu routines need to translate from a host pointer
2912 (typically a TLB entry) back to a ram offset. */
2913 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2914 {
2915 ram_addr_t ram_addr;
2916
2917 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2918 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2919 abort();
2920 }
2921 return ram_addr;
2922 }
2923
2924 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2925 unsigned size)
2926 {
2927 #ifdef DEBUG_UNASSIGNED
2928 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2929 #endif
2930 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2931 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2932 #endif
2933 return 0;
2934 }
2935
2936 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2937 uint64_t val, unsigned size)
2938 {
2939 #ifdef DEBUG_UNASSIGNED
2940 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2941 #endif
2942 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2943 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2944 #endif
2945 }
2946
2947 static const MemoryRegionOps unassigned_mem_ops = {
2948 .read = unassigned_mem_read,
2949 .write = unassigned_mem_write,
2950 .endianness = DEVICE_NATIVE_ENDIAN,
2951 };
2952
2953 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2954 unsigned size)
2955 {
2956 abort();
2957 }
2958
2959 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2960 uint64_t value, unsigned size)
2961 {
2962 abort();
2963 }
2964
2965 static const MemoryRegionOps error_mem_ops = {
2966 .read = error_mem_read,
2967 .write = error_mem_write,
2968 .endianness = DEVICE_NATIVE_ENDIAN,
2969 };
2970
2971 static const MemoryRegionOps rom_mem_ops = {
2972 .read = error_mem_read,
2973 .write = unassigned_mem_write,
2974 .endianness = DEVICE_NATIVE_ENDIAN,
2975 };
2976
2977 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2978 uint64_t val, unsigned size)
2979 {
2980 int dirty_flags;
2981 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2982 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2983 #if !defined(CONFIG_USER_ONLY)
2984 tb_invalidate_phys_page_fast(ram_addr, size);
2985 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2986 #endif
2987 }
2988 switch (size) {
2989 case 1:
2990 stb_p(qemu_get_ram_ptr(ram_addr), val);
2991 break;
2992 case 2:
2993 stw_p(qemu_get_ram_ptr(ram_addr), val);
2994 break;
2995 case 4:
2996 stl_p(qemu_get_ram_ptr(ram_addr), val);
2997 break;
2998 default:
2999 abort();
3000 }
3001 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3002 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3003 /* we remove the notdirty callback only if the code has been
3004 flushed */
3005 if (dirty_flags == 0xff)
3006 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3007 }
3008
3009 static const MemoryRegionOps notdirty_mem_ops = {
3010 .read = error_mem_read,
3011 .write = notdirty_mem_write,
3012 .endianness = DEVICE_NATIVE_ENDIAN,
3013 };
3014
3015 /* Generate a debug exception if a watchpoint has been hit. */
3016 static void check_watchpoint(int offset, int len_mask, int flags)
3017 {
3018 CPUArchState *env = cpu_single_env;
3019 target_ulong pc, cs_base;
3020 TranslationBlock *tb;
3021 target_ulong vaddr;
3022 CPUWatchpoint *wp;
3023 int cpu_flags;
3024
3025 if (env->watchpoint_hit) {
3026 /* We re-entered the check after replacing the TB. Now raise
3027 * the debug interrupt so that is will trigger after the
3028 * current instruction. */
3029 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3030 return;
3031 }
3032 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3033 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3034 if ((vaddr == (wp->vaddr & len_mask) ||
3035 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3036 wp->flags |= BP_WATCHPOINT_HIT;
3037 if (!env->watchpoint_hit) {
3038 env->watchpoint_hit = wp;
3039 tb = tb_find_pc(env->mem_io_pc);
3040 if (!tb) {
3041 cpu_abort(env, "check_watchpoint: could not find TB for "
3042 "pc=%p", (void *)env->mem_io_pc);
3043 }
3044 cpu_restore_state(tb, env, env->mem_io_pc);
3045 tb_phys_invalidate(tb, -1);
3046 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3047 env->exception_index = EXCP_DEBUG;
3048 cpu_loop_exit(env);
3049 } else {
3050 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3051 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3052 cpu_resume_from_signal(env, NULL);
3053 }
3054 }
3055 } else {
3056 wp->flags &= ~BP_WATCHPOINT_HIT;
3057 }
3058 }
3059 }
3060
3061 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3062 so these check for a hit then pass through to the normal out-of-line
3063 phys routines. */
3064 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3065 unsigned size)
3066 {
3067 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3068 switch (size) {
3069 case 1: return ldub_phys(addr);
3070 case 2: return lduw_phys(addr);
3071 case 4: return ldl_phys(addr);
3072 default: abort();
3073 }
3074 }
3075
3076 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3077 uint64_t val, unsigned size)
3078 {
3079 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3080 switch (size) {
3081 case 1:
3082 stb_phys(addr, val);
3083 break;
3084 case 2:
3085 stw_phys(addr, val);
3086 break;
3087 case 4:
3088 stl_phys(addr, val);
3089 break;
3090 default: abort();
3091 }
3092 }
3093
3094 static const MemoryRegionOps watch_mem_ops = {
3095 .read = watch_mem_read,
3096 .write = watch_mem_write,
3097 .endianness = DEVICE_NATIVE_ENDIAN,
3098 };
3099
3100 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3101 unsigned len)
3102 {
3103 subpage_t *mmio = opaque;
3104 unsigned int idx = SUBPAGE_IDX(addr);
3105 MemoryRegionSection *section;
3106 #if defined(DEBUG_SUBPAGE)
3107 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3108 mmio, len, addr, idx);
3109 #endif
3110
3111 section = &phys_sections[mmio->sub_section[idx]];
3112 addr += mmio->base;
3113 addr -= section->offset_within_address_space;
3114 addr += section->offset_within_region;
3115 return io_mem_read(section->mr, addr, len);
3116 }
3117
3118 static void subpage_write(void *opaque, target_phys_addr_t addr,
3119 uint64_t value, unsigned len)
3120 {
3121 subpage_t *mmio = opaque;
3122 unsigned int idx = SUBPAGE_IDX(addr);
3123 MemoryRegionSection *section;
3124 #if defined(DEBUG_SUBPAGE)
3125 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3126 " idx %d value %"PRIx64"\n",
3127 __func__, mmio, len, addr, idx, value);
3128 #endif
3129
3130 section = &phys_sections[mmio->sub_section[idx]];
3131 addr += mmio->base;
3132 addr -= section->offset_within_address_space;
3133 addr += section->offset_within_region;
3134 io_mem_write(section->mr, addr, value, len);
3135 }
3136
3137 static const MemoryRegionOps subpage_ops = {
3138 .read = subpage_read,
3139 .write = subpage_write,
3140 .endianness = DEVICE_NATIVE_ENDIAN,
3141 };
3142
3143 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3144 unsigned size)
3145 {
3146 ram_addr_t raddr = addr;
3147 void *ptr = qemu_get_ram_ptr(raddr);
3148 switch (size) {
3149 case 1: return ldub_p(ptr);
3150 case 2: return lduw_p(ptr);
3151 case 4: return ldl_p(ptr);
3152 default: abort();
3153 }
3154 }
3155
3156 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3157 uint64_t value, unsigned size)
3158 {
3159 ram_addr_t raddr = addr;
3160 void *ptr = qemu_get_ram_ptr(raddr);
3161 switch (size) {
3162 case 1: return stb_p(ptr, value);
3163 case 2: return stw_p(ptr, value);
3164 case 4: return stl_p(ptr, value);
3165 default: abort();
3166 }
3167 }
3168
3169 static const MemoryRegionOps subpage_ram_ops = {
3170 .read = subpage_ram_read,
3171 .write = subpage_ram_write,
3172 .endianness = DEVICE_NATIVE_ENDIAN,
3173 };
3174
3175 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3176 uint16_t section)
3177 {
3178 int idx, eidx;
3179
3180 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3181 return -1;
3182 idx = SUBPAGE_IDX(start);
3183 eidx = SUBPAGE_IDX(end);
3184 #if defined(DEBUG_SUBPAGE)
3185 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3186 mmio, start, end, idx, eidx, memory);
3187 #endif
3188 if (memory_region_is_ram(phys_sections[section].mr)) {
3189 MemoryRegionSection new_section = phys_sections[section];
3190 new_section.mr = &io_mem_subpage_ram;
3191 section = phys_section_add(&new_section);
3192 }
3193 for (; idx <= eidx; idx++) {
3194 mmio->sub_section[idx] = section;
3195 }
3196
3197 return 0;
3198 }
3199
3200 static subpage_t *subpage_init(target_phys_addr_t base)
3201 {
3202 subpage_t *mmio;
3203
3204 mmio = g_malloc0(sizeof(subpage_t));
3205
3206 mmio->base = base;
3207 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3208 "subpage", TARGET_PAGE_SIZE);
3209 mmio->iomem.subpage = true;
3210 #if defined(DEBUG_SUBPAGE)
3211 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3212 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3213 #endif
3214 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3215
3216 return mmio;
3217 }
3218
3219 static uint16_t dummy_section(MemoryRegion *mr)
3220 {
3221 MemoryRegionSection section = {
3222 .mr = mr,
3223 .offset_within_address_space = 0,
3224 .offset_within_region = 0,
3225 .size = UINT64_MAX,
3226 };
3227
3228 return phys_section_add(&section);
3229 }
3230
3231 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3232 {
3233 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3234 }
3235
3236 static void io_mem_init(void)
3237 {
3238 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3239 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3240 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3241 "unassigned", UINT64_MAX);
3242 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3243 "notdirty", UINT64_MAX);
3244 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3245 "subpage-ram", UINT64_MAX);
3246 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3247 "watch", UINT64_MAX);
3248 }
3249
3250 static void core_begin(MemoryListener *listener)
3251 {
3252 destroy_all_mappings();
3253 phys_sections_clear();
3254 phys_map.ptr = PHYS_MAP_NODE_NIL;
3255 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3256 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3257 phys_section_rom = dummy_section(&io_mem_rom);
3258 phys_section_watch = dummy_section(&io_mem_watch);
3259 }
3260
3261 static void core_commit(MemoryListener *listener)
3262 {
3263 CPUArchState *env;
3264
3265 /* since each CPU stores ram addresses in its TLB cache, we must
3266 reset the modified entries */
3267 /* XXX: slow ! */
3268 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3269 tlb_flush(env, 1);
3270 }
3271 }
3272
3273 static void core_region_add(MemoryListener *listener,
3274 MemoryRegionSection *section)
3275 {
3276 cpu_register_physical_memory_log(section, section->readonly);
3277 }
3278
3279 static void core_region_del(MemoryListener *listener,
3280 MemoryRegionSection *section)
3281 {
3282 }
3283
3284 static void core_region_nop(MemoryListener *listener,
3285 MemoryRegionSection *section)
3286 {
3287 cpu_register_physical_memory_log(section, section->readonly);
3288 }
3289
3290 static void core_log_start(MemoryListener *listener,
3291 MemoryRegionSection *section)
3292 {
3293 }
3294
3295 static void core_log_stop(MemoryListener *listener,
3296 MemoryRegionSection *section)
3297 {
3298 }
3299
3300 static void core_log_sync(MemoryListener *listener,
3301 MemoryRegionSection *section)
3302 {
3303 }
3304
3305 static void core_log_global_start(MemoryListener *listener)
3306 {
3307 cpu_physical_memory_set_dirty_tracking(1);
3308 }
3309
3310 static void core_log_global_stop(MemoryListener *listener)
3311 {
3312 cpu_physical_memory_set_dirty_tracking(0);
3313 }
3314
3315 static void core_eventfd_add(MemoryListener *listener,
3316 MemoryRegionSection *section,
3317 bool match_data, uint64_t data, int fd)
3318 {
3319 }
3320
3321 static void core_eventfd_del(MemoryListener *listener,
3322 MemoryRegionSection *section,
3323 bool match_data, uint64_t data, int fd)
3324 {
3325 }
3326
3327 static void io_begin(MemoryListener *listener)
3328 {
3329 }
3330
3331 static void io_commit(MemoryListener *listener)
3332 {
3333 }
3334
3335 static void io_region_add(MemoryListener *listener,
3336 MemoryRegionSection *section)
3337 {
3338 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3339
3340 mrio->mr = section->mr;
3341 mrio->offset = section->offset_within_region;
3342 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3343 section->offset_within_address_space, section->size);
3344 ioport_register(&mrio->iorange);
3345 }
3346
3347 static void io_region_del(MemoryListener *listener,
3348 MemoryRegionSection *section)
3349 {
3350 isa_unassign_ioport(section->offset_within_address_space, section->size);
3351 }
3352
3353 static void io_region_nop(MemoryListener *listener,
3354 MemoryRegionSection *section)
3355 {
3356 }
3357
3358 static void io_log_start(MemoryListener *listener,
3359 MemoryRegionSection *section)
3360 {
3361 }
3362
3363 static void io_log_stop(MemoryListener *listener,
3364 MemoryRegionSection *section)
3365 {
3366 }
3367
3368 static void io_log_sync(MemoryListener *listener,
3369 MemoryRegionSection *section)
3370 {
3371 }
3372
3373 static void io_log_global_start(MemoryListener *listener)
3374 {
3375 }
3376
3377 static void io_log_global_stop(MemoryListener *listener)
3378 {
3379 }
3380
3381 static void io_eventfd_add(MemoryListener *listener,
3382 MemoryRegionSection *section,
3383 bool match_data, uint64_t data, int fd)
3384 {
3385 }
3386
3387 static void io_eventfd_del(MemoryListener *listener,
3388 MemoryRegionSection *section,
3389 bool match_data, uint64_t data, int fd)
3390 {
3391 }
3392
3393 static MemoryListener core_memory_listener = {
3394 .begin = core_begin,
3395 .commit = core_commit,
3396 .region_add = core_region_add,
3397 .region_del = core_region_del,
3398 .region_nop = core_region_nop,
3399 .log_start = core_log_start,
3400 .log_stop = core_log_stop,
3401 .log_sync = core_log_sync,
3402 .log_global_start = core_log_global_start,
3403 .log_global_stop = core_log_global_stop,
3404 .eventfd_add = core_eventfd_add,
3405 .eventfd_del = core_eventfd_del,
3406 .priority = 0,
3407 };
3408
3409 static MemoryListener io_memory_listener = {
3410 .begin = io_begin,
3411 .commit = io_commit,
3412 .region_add = io_region_add,
3413 .region_del = io_region_del,
3414 .region_nop = io_region_nop,
3415 .log_start = io_log_start,
3416 .log_stop = io_log_stop,
3417 .log_sync = io_log_sync,
3418 .log_global_start = io_log_global_start,
3419 .log_global_stop = io_log_global_stop,
3420 .eventfd_add = io_eventfd_add,
3421 .eventfd_del = io_eventfd_del,
3422 .priority = 0,
3423 };
3424
3425 static void memory_map_init(void)
3426 {
3427 system_memory = g_malloc(sizeof(*system_memory));
3428 memory_region_init(system_memory, "system", INT64_MAX);
3429 set_system_memory_map(system_memory);
3430
3431 system_io = g_malloc(sizeof(*system_io));
3432 memory_region_init(system_io, "io", 65536);
3433 set_system_io_map(system_io);
3434
3435 memory_listener_register(&core_memory_listener, system_memory);
3436 memory_listener_register(&io_memory_listener, system_io);
3437 }
3438
3439 MemoryRegion *get_system_memory(void)
3440 {
3441 return system_memory;
3442 }
3443
3444 MemoryRegion *get_system_io(void)
3445 {
3446 return system_io;
3447 }
3448
3449 #endif /* !defined(CONFIG_USER_ONLY) */
3450
3451 /* physical memory access (slow version, mainly for debug) */
3452 #if defined(CONFIG_USER_ONLY)
3453 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3454 uint8_t *buf, int len, int is_write)
3455 {
3456 int l, flags;
3457 target_ulong page;
3458 void * p;
3459
3460 while (len > 0) {
3461 page = addr & TARGET_PAGE_MASK;
3462 l = (page + TARGET_PAGE_SIZE) - addr;
3463 if (l > len)
3464 l = len;
3465 flags = page_get_flags(page);
3466 if (!(flags & PAGE_VALID))
3467 return -1;
3468 if (is_write) {
3469 if (!(flags & PAGE_WRITE))
3470 return -1;
3471 /* XXX: this code should not depend on lock_user */
3472 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3473 return -1;
3474 memcpy(p, buf, l);
3475 unlock_user(p, addr, l);
3476 } else {
3477 if (!(flags & PAGE_READ))
3478 return -1;
3479 /* XXX: this code should not depend on lock_user */
3480 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3481 return -1;
3482 memcpy(buf, p, l);
3483 unlock_user(p, addr, 0);
3484 }
3485 len -= l;
3486 buf += l;
3487 addr += l;
3488 }
3489 return 0;
3490 }
3491
3492 #else
3493 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3494 int len, int is_write)
3495 {
3496 int l;
3497 uint8_t *ptr;
3498 uint32_t val;
3499 target_phys_addr_t page;
3500 MemoryRegionSection *section;
3501
3502 while (len > 0) {
3503 page = addr & TARGET_PAGE_MASK;
3504 l = (page + TARGET_PAGE_SIZE) - addr;
3505 if (l > len)
3506 l = len;
3507 section = phys_page_find(page >> TARGET_PAGE_BITS);
3508
3509 if (is_write) {
3510 if (!memory_region_is_ram(section->mr)) {
3511 target_phys_addr_t addr1;
3512 addr1 = memory_region_section_addr(section, addr);
3513 /* XXX: could force cpu_single_env to NULL to avoid
3514 potential bugs */
3515 if (l >= 4 && ((addr1 & 3) == 0)) {
3516 /* 32 bit write access */
3517 val = ldl_p(buf);
3518 io_mem_write(section->mr, addr1, val, 4);
3519 l = 4;
3520 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3521 /* 16 bit write access */
3522 val = lduw_p(buf);
3523 io_mem_write(section->mr, addr1, val, 2);
3524 l = 2;
3525 } else {
3526 /* 8 bit write access */
3527 val = ldub_p(buf);
3528 io_mem_write(section->mr, addr1, val, 1);
3529 l = 1;
3530 }
3531 } else if (!section->readonly) {
3532 ram_addr_t addr1;
3533 addr1 = memory_region_get_ram_addr(section->mr)
3534 + memory_region_section_addr(section, addr);
3535 /* RAM case */
3536 ptr = qemu_get_ram_ptr(addr1);
3537 memcpy(ptr, buf, l);
3538 if (!cpu_physical_memory_is_dirty(addr1)) {
3539 /* invalidate code */
3540 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3541 /* set dirty bit */
3542 cpu_physical_memory_set_dirty_flags(
3543 addr1, (0xff & ~CODE_DIRTY_FLAG));
3544 }
3545 qemu_put_ram_ptr(ptr);
3546 }
3547 } else {
3548 if (!(memory_region_is_ram(section->mr) ||
3549 memory_region_is_romd(section->mr))) {
3550 target_phys_addr_t addr1;
3551 /* I/O case */
3552 addr1 = memory_region_section_addr(section, addr);
3553 if (l >= 4 && ((addr1 & 3) == 0)) {
3554 /* 32 bit read access */
3555 val = io_mem_read(section->mr, addr1, 4);
3556 stl_p(buf, val);
3557 l = 4;
3558 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3559 /* 16 bit read access */
3560 val = io_mem_read(section->mr, addr1, 2);
3561 stw_p(buf, val);
3562 l = 2;
3563 } else {
3564 /* 8 bit read access */
3565 val = io_mem_read(section->mr, addr1, 1);
3566 stb_p(buf, val);
3567 l = 1;
3568 }
3569 } else {
3570 /* RAM case */
3571 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3572 + memory_region_section_addr(section,
3573 addr));
3574 memcpy(buf, ptr, l);
3575 qemu_put_ram_ptr(ptr);
3576 }
3577 }
3578 len -= l;
3579 buf += l;
3580 addr += l;
3581 }
3582 }
3583
3584 /* used for ROM loading : can write in RAM and ROM */
3585 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3586 const uint8_t *buf, int len)
3587 {
3588 int l;
3589 uint8_t *ptr;
3590 target_phys_addr_t page;
3591 MemoryRegionSection *section;
3592
3593 while (len > 0) {
3594 page = addr & TARGET_PAGE_MASK;
3595 l = (page + TARGET_PAGE_SIZE) - addr;
3596 if (l > len)
3597 l = len;
3598 section = phys_page_find(page >> TARGET_PAGE_BITS);
3599
3600 if (!(memory_region_is_ram(section->mr) ||
3601 memory_region_is_romd(section->mr))) {
3602 /* do nothing */
3603 } else {
3604 unsigned long addr1;
3605 addr1 = memory_region_get_ram_addr(section->mr)
3606 + memory_region_section_addr(section, addr);
3607 /* ROM/RAM case */
3608 ptr = qemu_get_ram_ptr(addr1);
3609 memcpy(ptr, buf, l);
3610 qemu_put_ram_ptr(ptr);
3611 }
3612 len -= l;
3613 buf += l;
3614 addr += l;
3615 }
3616 }
3617
3618 typedef struct {
3619 void *buffer;
3620 target_phys_addr_t addr;
3621 target_phys_addr_t len;
3622 } BounceBuffer;
3623
3624 static BounceBuffer bounce;
3625
3626 typedef struct MapClient {
3627 void *opaque;
3628 void (*callback)(void *opaque);
3629 QLIST_ENTRY(MapClient) link;
3630 } MapClient;
3631
3632 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3633 = QLIST_HEAD_INITIALIZER(map_client_list);
3634
3635 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3636 {
3637 MapClient *client = g_malloc(sizeof(*client));
3638
3639 client->opaque = opaque;
3640 client->callback = callback;
3641 QLIST_INSERT_HEAD(&map_client_list, client, link);
3642 return client;
3643 }
3644
3645 void cpu_unregister_map_client(void *_client)
3646 {
3647 MapClient *client = (MapClient *)_client;
3648
3649 QLIST_REMOVE(client, link);
3650 g_free(client);
3651 }
3652
3653 static void cpu_notify_map_clients(void)
3654 {
3655 MapClient *client;
3656
3657 while (!QLIST_EMPTY(&map_client_list)) {
3658 client = QLIST_FIRST(&map_client_list);
3659 client->callback(client->opaque);
3660 cpu_unregister_map_client(client);
3661 }
3662 }
3663
3664 /* Map a physical memory region into a host virtual address.
3665 * May map a subset of the requested range, given by and returned in *plen.
3666 * May return NULL if resources needed to perform the mapping are exhausted.
3667 * Use only for reads OR writes - not for read-modify-write operations.
3668 * Use cpu_register_map_client() to know when retrying the map operation is
3669 * likely to succeed.
3670 */
3671 void *cpu_physical_memory_map(target_phys_addr_t addr,
3672 target_phys_addr_t *plen,
3673 int is_write)
3674 {
3675 target_phys_addr_t len = *plen;
3676 target_phys_addr_t todo = 0;
3677 int l;
3678 target_phys_addr_t page;
3679 MemoryRegionSection *section;
3680 ram_addr_t raddr = RAM_ADDR_MAX;
3681 ram_addr_t rlen;
3682 void *ret;
3683
3684 while (len > 0) {
3685 page = addr & TARGET_PAGE_MASK;
3686 l = (page + TARGET_PAGE_SIZE) - addr;
3687 if (l > len)
3688 l = len;
3689 section = phys_page_find(page >> TARGET_PAGE_BITS);
3690
3691 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3692 if (todo || bounce.buffer) {
3693 break;
3694 }
3695 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3696 bounce.addr = addr;
3697 bounce.len = l;
3698 if (!is_write) {
3699 cpu_physical_memory_read(addr, bounce.buffer, l);
3700 }
3701
3702 *plen = l;
3703 return bounce.buffer;
3704 }
3705 if (!todo) {
3706 raddr = memory_region_get_ram_addr(section->mr)
3707 + memory_region_section_addr(section, addr);
3708 }
3709
3710 len -= l;
3711 addr += l;
3712 todo += l;
3713 }
3714 rlen = todo;
3715 ret = qemu_ram_ptr_length(raddr, &rlen);
3716 *plen = rlen;
3717 return ret;
3718 }
3719
3720 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3721 * Will also mark the memory as dirty if is_write == 1. access_len gives
3722 * the amount of memory that was actually read or written by the caller.
3723 */
3724 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3725 int is_write, target_phys_addr_t access_len)
3726 {
3727 if (buffer != bounce.buffer) {
3728 if (is_write) {
3729 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3730 while (access_len) {
3731 unsigned l;
3732 l = TARGET_PAGE_SIZE;
3733 if (l > access_len)
3734 l = access_len;
3735 if (!cpu_physical_memory_is_dirty(addr1)) {
3736 /* invalidate code */
3737 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3738 /* set dirty bit */
3739 cpu_physical_memory_set_dirty_flags(
3740 addr1, (0xff & ~CODE_DIRTY_FLAG));
3741 }
3742 addr1 += l;
3743 access_len -= l;
3744 }
3745 }
3746 if (xen_enabled()) {
3747 xen_invalidate_map_cache_entry(buffer);
3748 }
3749 return;
3750 }
3751 if (is_write) {
3752 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3753 }
3754 qemu_vfree(bounce.buffer);
3755 bounce.buffer = NULL;
3756 cpu_notify_map_clients();
3757 }
3758
3759 /* warning: addr must be aligned */
3760 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3761 enum device_endian endian)
3762 {
3763 uint8_t *ptr;
3764 uint32_t val;
3765 MemoryRegionSection *section;
3766
3767 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3768
3769 if (!(memory_region_is_ram(section->mr) ||
3770 memory_region_is_romd(section->mr))) {
3771 /* I/O case */
3772 addr = memory_region_section_addr(section, addr);
3773 val = io_mem_read(section->mr, addr, 4);
3774 #if defined(TARGET_WORDS_BIGENDIAN)
3775 if (endian == DEVICE_LITTLE_ENDIAN) {
3776 val = bswap32(val);
3777 }
3778 #else
3779 if (endian == DEVICE_BIG_ENDIAN) {
3780 val = bswap32(val);
3781 }
3782 #endif
3783 } else {
3784 /* RAM case */
3785 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3786 & TARGET_PAGE_MASK)
3787 + memory_region_section_addr(section, addr));
3788 switch (endian) {
3789 case DEVICE_LITTLE_ENDIAN:
3790 val = ldl_le_p(ptr);
3791 break;
3792 case DEVICE_BIG_ENDIAN:
3793 val = ldl_be_p(ptr);
3794 break;
3795 default:
3796 val = ldl_p(ptr);
3797 break;
3798 }
3799 }
3800 return val;
3801 }
3802
3803 uint32_t ldl_phys(target_phys_addr_t addr)
3804 {
3805 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3806 }
3807
3808 uint32_t ldl_le_phys(target_phys_addr_t addr)
3809 {
3810 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3811 }
3812
3813 uint32_t ldl_be_phys(target_phys_addr_t addr)
3814 {
3815 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3816 }
3817
3818 /* warning: addr must be aligned */
3819 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3820 enum device_endian endian)
3821 {
3822 uint8_t *ptr;
3823 uint64_t val;
3824 MemoryRegionSection *section;
3825
3826 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3827
3828 if (!(memory_region_is_ram(section->mr) ||
3829 memory_region_is_romd(section->mr))) {
3830 /* I/O case */
3831 addr = memory_region_section_addr(section, addr);
3832
3833 /* XXX This is broken when device endian != cpu endian.
3834 Fix and add "endian" variable check */
3835 #ifdef TARGET_WORDS_BIGENDIAN
3836 val = io_mem_read(section->mr, addr, 4) << 32;
3837 val |= io_mem_read(section->mr, addr + 4, 4);
3838 #else
3839 val = io_mem_read(section->mr, addr, 4);
3840 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3841 #endif
3842 } else {
3843 /* RAM case */
3844 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3845 & TARGET_PAGE_MASK)
3846 + memory_region_section_addr(section, addr));
3847 switch (endian) {
3848 case DEVICE_LITTLE_ENDIAN:
3849 val = ldq_le_p(ptr);
3850 break;
3851 case DEVICE_BIG_ENDIAN:
3852 val = ldq_be_p(ptr);
3853 break;
3854 default:
3855 val = ldq_p(ptr);
3856 break;
3857 }
3858 }
3859 return val;
3860 }
3861
3862 uint64_t ldq_phys(target_phys_addr_t addr)
3863 {
3864 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3865 }
3866
3867 uint64_t ldq_le_phys(target_phys_addr_t addr)
3868 {
3869 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3870 }
3871
3872 uint64_t ldq_be_phys(target_phys_addr_t addr)
3873 {
3874 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3875 }
3876
3877 /* XXX: optimize */
3878 uint32_t ldub_phys(target_phys_addr_t addr)
3879 {
3880 uint8_t val;
3881 cpu_physical_memory_read(addr, &val, 1);
3882 return val;
3883 }
3884
3885 /* warning: addr must be aligned */
3886 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3887 enum device_endian endian)
3888 {
3889 uint8_t *ptr;
3890 uint64_t val;
3891 MemoryRegionSection *section;
3892
3893 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3894
3895 if (!(memory_region_is_ram(section->mr) ||
3896 memory_region_is_romd(section->mr))) {
3897 /* I/O case */
3898 addr = memory_region_section_addr(section, addr);
3899 val = io_mem_read(section->mr, addr, 2);
3900 #if defined(TARGET_WORDS_BIGENDIAN)
3901 if (endian == DEVICE_LITTLE_ENDIAN) {
3902 val = bswap16(val);
3903 }
3904 #else
3905 if (endian == DEVICE_BIG_ENDIAN) {
3906 val = bswap16(val);
3907 }
3908 #endif
3909 } else {
3910 /* RAM case */
3911 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3912 & TARGET_PAGE_MASK)
3913 + memory_region_section_addr(section, addr));
3914 switch (endian) {
3915 case DEVICE_LITTLE_ENDIAN:
3916 val = lduw_le_p(ptr);
3917 break;
3918 case DEVICE_BIG_ENDIAN:
3919 val = lduw_be_p(ptr);
3920 break;
3921 default:
3922 val = lduw_p(ptr);
3923 break;
3924 }
3925 }
3926 return val;
3927 }
3928
3929 uint32_t lduw_phys(target_phys_addr_t addr)
3930 {
3931 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3932 }
3933
3934 uint32_t lduw_le_phys(target_phys_addr_t addr)
3935 {
3936 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3937 }
3938
3939 uint32_t lduw_be_phys(target_phys_addr_t addr)
3940 {
3941 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3942 }
3943
3944 /* warning: addr must be aligned. The ram page is not masked as dirty
3945 and the code inside is not invalidated. It is useful if the dirty
3946 bits are used to track modified PTEs */
3947 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3948 {
3949 uint8_t *ptr;
3950 MemoryRegionSection *section;
3951
3952 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3953
3954 if (!memory_region_is_ram(section->mr) || section->readonly) {
3955 addr = memory_region_section_addr(section, addr);
3956 if (memory_region_is_ram(section->mr)) {
3957 section = &phys_sections[phys_section_rom];
3958 }
3959 io_mem_write(section->mr, addr, val, 4);
3960 } else {
3961 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3962 & TARGET_PAGE_MASK)
3963 + memory_region_section_addr(section, addr);
3964 ptr = qemu_get_ram_ptr(addr1);
3965 stl_p(ptr, val);
3966
3967 if (unlikely(in_migration)) {
3968 if (!cpu_physical_memory_is_dirty(addr1)) {
3969 /* invalidate code */
3970 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3971 /* set dirty bit */
3972 cpu_physical_memory_set_dirty_flags(
3973 addr1, (0xff & ~CODE_DIRTY_FLAG));
3974 }
3975 }
3976 }
3977 }
3978
3979 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3980 {
3981 uint8_t *ptr;
3982 MemoryRegionSection *section;
3983
3984 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3985
3986 if (!memory_region_is_ram(section->mr) || section->readonly) {
3987 addr = memory_region_section_addr(section, addr);
3988 if (memory_region_is_ram(section->mr)) {
3989 section = &phys_sections[phys_section_rom];
3990 }
3991 #ifdef TARGET_WORDS_BIGENDIAN
3992 io_mem_write(section->mr, addr, val >> 32, 4);
3993 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3994 #else
3995 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3996 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3997 #endif
3998 } else {
3999 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4000 & TARGET_PAGE_MASK)
4001 + memory_region_section_addr(section, addr));
4002 stq_p(ptr, val);
4003 }
4004 }
4005
4006 /* warning: addr must be aligned */
4007 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4008 enum device_endian endian)
4009 {
4010 uint8_t *ptr;
4011 MemoryRegionSection *section;
4012
4013 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4014
4015 if (!memory_region_is_ram(section->mr) || section->readonly) {
4016 addr = memory_region_section_addr(section, addr);
4017 if (memory_region_is_ram(section->mr)) {
4018 section = &phys_sections[phys_section_rom];
4019 }
4020 #if defined(TARGET_WORDS_BIGENDIAN)
4021 if (endian == DEVICE_LITTLE_ENDIAN) {
4022 val = bswap32(val);
4023 }
4024 #else
4025 if (endian == DEVICE_BIG_ENDIAN) {
4026 val = bswap32(val);
4027 }
4028 #endif
4029 io_mem_write(section->mr, addr, val, 4);
4030 } else {
4031 unsigned long addr1;
4032 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4033 + memory_region_section_addr(section, addr);
4034 /* RAM case */
4035 ptr = qemu_get_ram_ptr(addr1);
4036 switch (endian) {
4037 case DEVICE_LITTLE_ENDIAN:
4038 stl_le_p(ptr, val);
4039 break;
4040 case DEVICE_BIG_ENDIAN:
4041 stl_be_p(ptr, val);
4042 break;
4043 default:
4044 stl_p(ptr, val);
4045 break;
4046 }
4047 if (!cpu_physical_memory_is_dirty(addr1)) {
4048 /* invalidate code */
4049 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4050 /* set dirty bit */
4051 cpu_physical_memory_set_dirty_flags(addr1,
4052 (0xff & ~CODE_DIRTY_FLAG));
4053 }
4054 }
4055 }
4056
4057 void stl_phys(target_phys_addr_t addr, uint32_t val)
4058 {
4059 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4060 }
4061
4062 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4063 {
4064 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4065 }
4066
4067 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4068 {
4069 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4070 }
4071
4072 /* XXX: optimize */
4073 void stb_phys(target_phys_addr_t addr, uint32_t val)
4074 {
4075 uint8_t v = val;
4076 cpu_physical_memory_write(addr, &v, 1);
4077 }
4078
4079 /* warning: addr must be aligned */
4080 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4081 enum device_endian endian)
4082 {
4083 uint8_t *ptr;
4084 MemoryRegionSection *section;
4085
4086 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4087
4088 if (!memory_region_is_ram(section->mr) || section->readonly) {
4089 addr = memory_region_section_addr(section, addr);
4090 if (memory_region_is_ram(section->mr)) {
4091 section = &phys_sections[phys_section_rom];
4092 }
4093 #if defined(TARGET_WORDS_BIGENDIAN)
4094 if (endian == DEVICE_LITTLE_ENDIAN) {
4095 val = bswap16(val);
4096 }
4097 #else
4098 if (endian == DEVICE_BIG_ENDIAN) {
4099 val = bswap16(val);
4100 }
4101 #endif
4102 io_mem_write(section->mr, addr, val, 2);
4103 } else {
4104 unsigned long addr1;
4105 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4106 + memory_region_section_addr(section, addr);
4107 /* RAM case */
4108 ptr = qemu_get_ram_ptr(addr1);
4109 switch (endian) {
4110 case DEVICE_LITTLE_ENDIAN:
4111 stw_le_p(ptr, val);
4112 break;
4113 case DEVICE_BIG_ENDIAN:
4114 stw_be_p(ptr, val);
4115 break;
4116 default:
4117 stw_p(ptr, val);
4118 break;
4119 }
4120 if (!cpu_physical_memory_is_dirty(addr1)) {
4121 /* invalidate code */
4122 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4123 /* set dirty bit */
4124 cpu_physical_memory_set_dirty_flags(addr1,
4125 (0xff & ~CODE_DIRTY_FLAG));
4126 }
4127 }
4128 }
4129
4130 void stw_phys(target_phys_addr_t addr, uint32_t val)
4131 {
4132 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4133 }
4134
4135 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4136 {
4137 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4138 }
4139
4140 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4141 {
4142 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4143 }
4144
4145 /* XXX: optimize */
4146 void stq_phys(target_phys_addr_t addr, uint64_t val)
4147 {
4148 val = tswap64(val);
4149 cpu_physical_memory_write(addr, &val, 8);
4150 }
4151
4152 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4153 {
4154 val = cpu_to_le64(val);
4155 cpu_physical_memory_write(addr, &val, 8);
4156 }
4157
4158 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4159 {
4160 val = cpu_to_be64(val);
4161 cpu_physical_memory_write(addr, &val, 8);
4162 }
4163
4164 /* virtual memory access for debug (includes writing to ROM) */
4165 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4166 uint8_t *buf, int len, int is_write)
4167 {
4168 int l;
4169 target_phys_addr_t phys_addr;
4170 target_ulong page;
4171
4172 while (len > 0) {
4173 page = addr & TARGET_PAGE_MASK;
4174 phys_addr = cpu_get_phys_page_debug(env, page);
4175 /* if no physical page mapped, return an error */
4176 if (phys_addr == -1)
4177 return -1;
4178 l = (page + TARGET_PAGE_SIZE) - addr;
4179 if (l > len)
4180 l = len;
4181 phys_addr += (addr & ~TARGET_PAGE_MASK);
4182 if (is_write)
4183 cpu_physical_memory_write_rom(phys_addr, buf, l);
4184 else
4185 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4186 len -= l;
4187 buf += l;
4188 addr += l;
4189 }
4190 return 0;
4191 }
4192 #endif
4193
4194 /* in deterministic execution mode, instructions doing device I/Os
4195 must be at the end of the TB */
4196 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4197 {
4198 TranslationBlock *tb;
4199 uint32_t n, cflags;
4200 target_ulong pc, cs_base;
4201 uint64_t flags;
4202
4203 tb = tb_find_pc(retaddr);
4204 if (!tb) {
4205 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4206 (void *)retaddr);
4207 }
4208 n = env->icount_decr.u16.low + tb->icount;
4209 cpu_restore_state(tb, env, retaddr);
4210 /* Calculate how many instructions had been executed before the fault
4211 occurred. */
4212 n = n - env->icount_decr.u16.low;
4213 /* Generate a new TB ending on the I/O insn. */
4214 n++;
4215 /* On MIPS and SH, delay slot instructions can only be restarted if
4216 they were already the first instruction in the TB. If this is not
4217 the first instruction in a TB then re-execute the preceding
4218 branch. */
4219 #if defined(TARGET_MIPS)
4220 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4221 env->active_tc.PC -= 4;
4222 env->icount_decr.u16.low++;
4223 env->hflags &= ~MIPS_HFLAG_BMASK;
4224 }
4225 #elif defined(TARGET_SH4)
4226 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4227 && n > 1) {
4228 env->pc -= 2;
4229 env->icount_decr.u16.low++;
4230 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4231 }
4232 #endif
4233 /* This should never happen. */
4234 if (n > CF_COUNT_MASK)
4235 cpu_abort(env, "TB too big during recompile");
4236
4237 cflags = n | CF_LAST_IO;
4238 pc = tb->pc;
4239 cs_base = tb->cs_base;
4240 flags = tb->flags;
4241 tb_phys_invalidate(tb, -1);
4242 /* FIXME: In theory this could raise an exception. In practice
4243 we have already translated the block once so it's probably ok. */
4244 tb_gen_code(env, pc, cs_base, flags, cflags);
4245 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4246 the first in the TB) then we end up generating a whole new TB and
4247 repeating the fault, which is horribly inefficient.
4248 Better would be to execute just this insn uncached, or generate a
4249 second new TB. */
4250 cpu_resume_from_signal(env, NULL);
4251 }
4252
4253 #if !defined(CONFIG_USER_ONLY)
4254
4255 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4256 {
4257 int i, target_code_size, max_target_code_size;
4258 int direct_jmp_count, direct_jmp2_count, cross_page;
4259 TranslationBlock *tb;
4260
4261 target_code_size = 0;
4262 max_target_code_size = 0;
4263 cross_page = 0;
4264 direct_jmp_count = 0;
4265 direct_jmp2_count = 0;
4266 for(i = 0; i < nb_tbs; i++) {
4267 tb = &tbs[i];
4268 target_code_size += tb->size;
4269 if (tb->size > max_target_code_size)
4270 max_target_code_size = tb->size;
4271 if (tb->page_addr[1] != -1)
4272 cross_page++;
4273 if (tb->tb_next_offset[0] != 0xffff) {
4274 direct_jmp_count++;
4275 if (tb->tb_next_offset[1] != 0xffff) {
4276 direct_jmp2_count++;
4277 }
4278 }
4279 }
4280 /* XXX: avoid using doubles ? */
4281 cpu_fprintf(f, "Translation buffer state:\n");
4282 cpu_fprintf(f, "gen code size %td/%ld\n",
4283 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4284 cpu_fprintf(f, "TB count %d/%d\n",
4285 nb_tbs, code_gen_max_blocks);
4286 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4287 nb_tbs ? target_code_size / nb_tbs : 0,
4288 max_target_code_size);
4289 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4290 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4291 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4292 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4293 cross_page,
4294 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4295 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4296 direct_jmp_count,
4297 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4298 direct_jmp2_count,
4299 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4300 cpu_fprintf(f, "\nStatistics:\n");
4301 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4302 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4303 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4304 tcg_dump_info(f, cpu_fprintf);
4305 }
4306
4307 /*
4308 * A helper function for the _utterly broken_ virtio device model to find out if
4309 * it's running on a big endian machine. Don't do this at home kids!
4310 */
4311 bool virtio_is_big_endian(void);
4312 bool virtio_is_big_endian(void)
4313 {
4314 #if defined(TARGET_WORDS_BIGENDIAN)
4315 return true;
4316 #else
4317 return false;
4318 #endif
4319 }
4320
4321 #endif