]> git.proxmox.com Git - qemu.git/blob - exec.c
qemu-option: opt_set(): use error_set()
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
103
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
110
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
114
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
119
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
122
123 #endif
124
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
145
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
157
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
161
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
174
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
182
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
186
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
189
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
196
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
201 };
202
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212
213 static void io_mem_init(void);
214 static void memory_map_init(void);
215
216 static MemoryRegion io_mem_watch;
217 #endif
218
219 /* log support */
220 #ifdef WIN32
221 static const char *logfilename = "qemu.log";
222 #else
223 static const char *logfilename = "/tmp/qemu.log";
224 #endif
225 FILE *logfile;
226 int loglevel;
227 static int log_append = 0;
228
229 /* statistics */
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
232
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
235 {
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
239
240 }
241 #else
242 static void map_exec(void *addr, long size)
243 {
244 unsigned long start, end, page_size;
245
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
249
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
253
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
256 }
257 #endif
258
259 static void page_init(void)
260 {
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
264 {
265 SYSTEM_INFO system_info;
266
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
269 }
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278
279 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 {
281 #ifdef HAVE_KINFO_GETVMMAP
282 struct kinfo_vmentry *freep;
283 int i, cnt;
284
285 freep = kinfo_getvmmap(getpid(), &cnt);
286 if (freep) {
287 mmap_lock();
288 for (i = 0; i < cnt; i++) {
289 unsigned long startaddr, endaddr;
290
291 startaddr = freep[i].kve_start;
292 endaddr = freep[i].kve_end;
293 if (h2g_valid(startaddr)) {
294 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295
296 if (h2g_valid(endaddr)) {
297 endaddr = h2g(endaddr);
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 } else {
300 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
301 endaddr = ~0ul;
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 #endif
304 }
305 }
306 }
307 free(freep);
308 mmap_unlock();
309 }
310 #else
311 FILE *f;
312
313 last_brk = (unsigned long)sbrk(0);
314
315 f = fopen("/compat/linux/proc/self/maps", "r");
316 if (f) {
317 mmap_lock();
318
319 do {
320 unsigned long startaddr, endaddr;
321 int n;
322
323 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324
325 if (n == 2 && h2g_valid(startaddr)) {
326 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327
328 if (h2g_valid(endaddr)) {
329 endaddr = h2g(endaddr);
330 } else {
331 endaddr = ~0ul;
332 }
333 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 }
335 } while (!feof(f));
336
337 fclose(f);
338 mmap_unlock();
339 }
340 #endif
341 }
342 #endif
343 }
344
345 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 {
347 PageDesc *pd;
348 void **lp;
349 int i;
350
351 #if defined(CONFIG_USER_ONLY)
352 /* We can't use g_malloc because it may recurse into a locked mutex. */
353 # define ALLOC(P, SIZE) \
354 do { \
355 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
356 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
357 } while (0)
358 #else
359 # define ALLOC(P, SIZE) \
360 do { P = g_malloc0(SIZE); } while (0)
361 #endif
362
363 /* Level 1. Always allocated. */
364 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365
366 /* Level 2..N-1. */
367 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
368 void **p = *lp;
369
370 if (p == NULL) {
371 if (!alloc) {
372 return NULL;
373 }
374 ALLOC(p, sizeof(void *) * L2_SIZE);
375 *lp = p;
376 }
377
378 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
379 }
380
381 pd = *lp;
382 if (pd == NULL) {
383 if (!alloc) {
384 return NULL;
385 }
386 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
387 *lp = pd;
388 }
389
390 #undef ALLOC
391
392 return pd + (index & (L2_SIZE - 1));
393 }
394
395 static inline PageDesc *page_find(tb_page_addr_t index)
396 {
397 return page_find_alloc(index, 0);
398 }
399
400 #if !defined(CONFIG_USER_ONLY)
401
402 static void phys_map_node_reserve(unsigned nodes)
403 {
404 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
405 typedef PhysPageEntry Node[L2_SIZE];
406 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
407 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
408 phys_map_nodes_nb + nodes);
409 phys_map_nodes = g_renew(Node, phys_map_nodes,
410 phys_map_nodes_nb_alloc);
411 }
412 }
413
414 static uint16_t phys_map_node_alloc(void)
415 {
416 unsigned i;
417 uint16_t ret;
418
419 ret = phys_map_nodes_nb++;
420 assert(ret != PHYS_MAP_NODE_NIL);
421 assert(ret != phys_map_nodes_nb_alloc);
422 for (i = 0; i < L2_SIZE; ++i) {
423 phys_map_nodes[ret][i].is_leaf = 0;
424 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
425 }
426 return ret;
427 }
428
429 static void phys_map_nodes_reset(void)
430 {
431 phys_map_nodes_nb = 0;
432 }
433
434
435 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
436 target_phys_addr_t *nb, uint16_t leaf,
437 int level)
438 {
439 PhysPageEntry *p;
440 int i;
441 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
442
443 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
444 lp->ptr = phys_map_node_alloc();
445 p = phys_map_nodes[lp->ptr];
446 if (level == 0) {
447 for (i = 0; i < L2_SIZE; i++) {
448 p[i].is_leaf = 1;
449 p[i].ptr = phys_section_unassigned;
450 }
451 }
452 } else {
453 p = phys_map_nodes[lp->ptr];
454 }
455 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
456
457 while (*nb && lp < &p[L2_SIZE]) {
458 if ((*index & (step - 1)) == 0 && *nb >= step) {
459 lp->is_leaf = true;
460 lp->ptr = leaf;
461 *index += step;
462 *nb -= step;
463 } else {
464 phys_page_set_level(lp, index, nb, leaf, level - 1);
465 }
466 ++lp;
467 }
468 }
469
470 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
471 uint16_t leaf)
472 {
473 /* Wildly overreserve - it doesn't matter much. */
474 phys_map_node_reserve(3 * P_L2_LEVELS);
475
476 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
477 }
478
479 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
480 {
481 PhysPageEntry lp = phys_map;
482 PhysPageEntry *p;
483 int i;
484 uint16_t s_index = phys_section_unassigned;
485
486 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
487 if (lp.ptr == PHYS_MAP_NODE_NIL) {
488 goto not_found;
489 }
490 p = phys_map_nodes[lp.ptr];
491 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
492 }
493
494 s_index = lp.ptr;
495 not_found:
496 return &phys_sections[s_index];
497 }
498
499 bool memory_region_is_unassigned(MemoryRegion *mr)
500 {
501 return mr != &io_mem_ram && mr != &io_mem_rom
502 && mr != &io_mem_notdirty && !mr->rom_device
503 && mr != &io_mem_watch;
504 }
505
506 #define mmap_lock() do { } while(0)
507 #define mmap_unlock() do { } while(0)
508 #endif
509
510 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
511
512 #if defined(CONFIG_USER_ONLY)
513 /* Currently it is not recommended to allocate big chunks of data in
514 user mode. It will change when a dedicated libc will be used */
515 #define USE_STATIC_CODE_GEN_BUFFER
516 #endif
517
518 #ifdef USE_STATIC_CODE_GEN_BUFFER
519 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
520 __attribute__((aligned (CODE_GEN_ALIGN)));
521 #endif
522
523 static void code_gen_alloc(unsigned long tb_size)
524 {
525 #ifdef USE_STATIC_CODE_GEN_BUFFER
526 code_gen_buffer = static_code_gen_buffer;
527 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 map_exec(code_gen_buffer, code_gen_buffer_size);
529 #else
530 code_gen_buffer_size = tb_size;
531 if (code_gen_buffer_size == 0) {
532 #if defined(CONFIG_USER_ONLY)
533 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
534 #else
535 /* XXX: needs adjustments */
536 code_gen_buffer_size = (unsigned long)(ram_size / 4);
537 #endif
538 }
539 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
540 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
541 /* The code gen buffer location may have constraints depending on
542 the host cpu and OS */
543 #if defined(__linux__)
544 {
545 int flags;
546 void *start = NULL;
547
548 flags = MAP_PRIVATE | MAP_ANONYMOUS;
549 #if defined(__x86_64__)
550 flags |= MAP_32BIT;
551 /* Cannot map more than that */
552 if (code_gen_buffer_size > (800 * 1024 * 1024))
553 code_gen_buffer_size = (800 * 1024 * 1024);
554 #elif defined(__sparc_v9__)
555 // Map the buffer below 2G, so we can use direct calls and branches
556 flags |= MAP_FIXED;
557 start = (void *) 0x60000000UL;
558 if (code_gen_buffer_size > (512 * 1024 * 1024))
559 code_gen_buffer_size = (512 * 1024 * 1024);
560 #elif defined(__arm__)
561 /* Keep the buffer no bigger than 16MB to branch between blocks */
562 if (code_gen_buffer_size > 16 * 1024 * 1024)
563 code_gen_buffer_size = 16 * 1024 * 1024;
564 #elif defined(__s390x__)
565 /* Map the buffer so that we can use direct calls and branches. */
566 /* We have a +- 4GB range on the branches; leave some slop. */
567 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
568 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
569 }
570 start = (void *)0x90000000UL;
571 #endif
572 code_gen_buffer = mmap(start, code_gen_buffer_size,
573 PROT_WRITE | PROT_READ | PROT_EXEC,
574 flags, -1, 0);
575 if (code_gen_buffer == MAP_FAILED) {
576 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
577 exit(1);
578 }
579 }
580 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
581 || defined(__DragonFly__) || defined(__OpenBSD__) \
582 || defined(__NetBSD__)
583 {
584 int flags;
585 void *addr = NULL;
586 flags = MAP_PRIVATE | MAP_ANONYMOUS;
587 #if defined(__x86_64__)
588 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
589 * 0x40000000 is free */
590 flags |= MAP_FIXED;
591 addr = (void *)0x40000000;
592 /* Cannot map more than that */
593 if (code_gen_buffer_size > (800 * 1024 * 1024))
594 code_gen_buffer_size = (800 * 1024 * 1024);
595 #elif defined(__sparc_v9__)
596 // Map the buffer below 2G, so we can use direct calls and branches
597 flags |= MAP_FIXED;
598 addr = (void *) 0x60000000UL;
599 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
600 code_gen_buffer_size = (512 * 1024 * 1024);
601 }
602 #endif
603 code_gen_buffer = mmap(addr, code_gen_buffer_size,
604 PROT_WRITE | PROT_READ | PROT_EXEC,
605 flags, -1, 0);
606 if (code_gen_buffer == MAP_FAILED) {
607 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
608 exit(1);
609 }
610 }
611 #else
612 code_gen_buffer = g_malloc(code_gen_buffer_size);
613 map_exec(code_gen_buffer, code_gen_buffer_size);
614 #endif
615 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
616 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
617 code_gen_buffer_max_size = code_gen_buffer_size -
618 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
619 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
620 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
621 }
622
623 /* Must be called before using the QEMU cpus. 'tb_size' is the size
624 (in bytes) allocated to the translation buffer. Zero means default
625 size. */
626 void tcg_exec_init(unsigned long tb_size)
627 {
628 cpu_gen_init();
629 code_gen_alloc(tb_size);
630 code_gen_ptr = code_gen_buffer;
631 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 page_init();
633 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
634 /* There's no guest base to take into account, so go ahead and
635 initialize the prologue now. */
636 tcg_prologue_init(&tcg_ctx);
637 #endif
638 }
639
640 bool tcg_enabled(void)
641 {
642 return code_gen_buffer != NULL;
643 }
644
645 void cpu_exec_init_all(void)
646 {
647 #if !defined(CONFIG_USER_ONLY)
648 memory_map_init();
649 io_mem_init();
650 #endif
651 }
652
653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654
655 static int cpu_common_post_load(void *opaque, int version_id)
656 {
657 CPUArchState *env = opaque;
658
659 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
660 version_id is increased. */
661 env->interrupt_request &= ~0x01;
662 tlb_flush(env, 1);
663
664 return 0;
665 }
666
667 static const VMStateDescription vmstate_cpu_common = {
668 .name = "cpu_common",
669 .version_id = 1,
670 .minimum_version_id = 1,
671 .minimum_version_id_old = 1,
672 .post_load = cpu_common_post_load,
673 .fields = (VMStateField []) {
674 VMSTATE_UINT32(halted, CPUArchState),
675 VMSTATE_UINT32(interrupt_request, CPUArchState),
676 VMSTATE_END_OF_LIST()
677 }
678 };
679 #endif
680
681 CPUArchState *qemu_get_cpu(int cpu)
682 {
683 CPUArchState *env = first_cpu;
684
685 while (env) {
686 if (env->cpu_index == cpu)
687 break;
688 env = env->next_cpu;
689 }
690
691 return env;
692 }
693
694 void cpu_exec_init(CPUArchState *env)
695 {
696 CPUArchState **penv;
697 int cpu_index;
698
699 #if defined(CONFIG_USER_ONLY)
700 cpu_list_lock();
701 #endif
702 env->next_cpu = NULL;
703 penv = &first_cpu;
704 cpu_index = 0;
705 while (*penv != NULL) {
706 penv = &(*penv)->next_cpu;
707 cpu_index++;
708 }
709 env->cpu_index = cpu_index;
710 env->numa_node = 0;
711 QTAILQ_INIT(&env->breakpoints);
712 QTAILQ_INIT(&env->watchpoints);
713 #ifndef CONFIG_USER_ONLY
714 env->thread_id = qemu_get_thread_id();
715 #endif
716 *penv = env;
717 #if defined(CONFIG_USER_ONLY)
718 cpu_list_unlock();
719 #endif
720 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
721 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
722 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
723 cpu_save, cpu_load, env);
724 #endif
725 }
726
727 /* Allocate a new translation block. Flush the translation buffer if
728 too many translation blocks or too much generated code. */
729 static TranslationBlock *tb_alloc(target_ulong pc)
730 {
731 TranslationBlock *tb;
732
733 if (nb_tbs >= code_gen_max_blocks ||
734 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
735 return NULL;
736 tb = &tbs[nb_tbs++];
737 tb->pc = pc;
738 tb->cflags = 0;
739 return tb;
740 }
741
742 void tb_free(TranslationBlock *tb)
743 {
744 /* In practice this is mostly used for single use temporary TB
745 Ignore the hard cases and just back up if this TB happens to
746 be the last one generated. */
747 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
748 code_gen_ptr = tb->tc_ptr;
749 nb_tbs--;
750 }
751 }
752
753 static inline void invalidate_page_bitmap(PageDesc *p)
754 {
755 if (p->code_bitmap) {
756 g_free(p->code_bitmap);
757 p->code_bitmap = NULL;
758 }
759 p->code_write_count = 0;
760 }
761
762 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763
764 static void page_flush_tb_1 (int level, void **lp)
765 {
766 int i;
767
768 if (*lp == NULL) {
769 return;
770 }
771 if (level == 0) {
772 PageDesc *pd = *lp;
773 for (i = 0; i < L2_SIZE; ++i) {
774 pd[i].first_tb = NULL;
775 invalidate_page_bitmap(pd + i);
776 }
777 } else {
778 void **pp = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 page_flush_tb_1 (level - 1, pp + i);
781 }
782 }
783 }
784
785 static void page_flush_tb(void)
786 {
787 int i;
788 for (i = 0; i < V_L1_SIZE; i++) {
789 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
790 }
791 }
792
793 /* flush all the translation blocks */
794 /* XXX: tb_flush is currently not thread safe */
795 void tb_flush(CPUArchState *env1)
796 {
797 CPUArchState *env;
798 #if defined(DEBUG_FLUSH)
799 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
800 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 nb_tbs, nb_tbs > 0 ?
802 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 #endif
804 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
805 cpu_abort(env1, "Internal error: code buffer overflow\n");
806
807 nb_tbs = 0;
808
809 for(env = first_cpu; env != NULL; env = env->next_cpu) {
810 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 }
812
813 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
814 page_flush_tb();
815
816 code_gen_ptr = code_gen_buffer;
817 /* XXX: flush processor icache at this point if cache flush is
818 expensive */
819 tb_flush_count++;
820 }
821
822 #ifdef DEBUG_TB_CHECK
823
824 static void tb_invalidate_check(target_ulong address)
825 {
826 TranslationBlock *tb;
827 int i;
828 address &= TARGET_PAGE_MASK;
829 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
830 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
831 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
832 address >= tb->pc + tb->size)) {
833 printf("ERROR invalidate: address=" TARGET_FMT_lx
834 " PC=%08lx size=%04x\n",
835 address, (long)tb->pc, tb->size);
836 }
837 }
838 }
839 }
840
841 /* verify that all the pages have correct rights for code */
842 static void tb_page_check(void)
843 {
844 TranslationBlock *tb;
845 int i, flags1, flags2;
846
847 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
848 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
849 flags1 = page_get_flags(tb->pc);
850 flags2 = page_get_flags(tb->pc + tb->size - 1);
851 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
852 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
853 (long)tb->pc, tb->size, flags1, flags2);
854 }
855 }
856 }
857 }
858
859 #endif
860
861 /* invalidate one TB */
862 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
863 int next_offset)
864 {
865 TranslationBlock *tb1;
866 for(;;) {
867 tb1 = *ptb;
868 if (tb1 == tb) {
869 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
870 break;
871 }
872 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
873 }
874 }
875
876 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 {
878 TranslationBlock *tb1;
879 unsigned int n1;
880
881 for(;;) {
882 tb1 = *ptb;
883 n1 = (uintptr_t)tb1 & 3;
884 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 if (tb1 == tb) {
886 *ptb = tb1->page_next[n1];
887 break;
888 }
889 ptb = &tb1->page_next[n1];
890 }
891 }
892
893 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 {
895 TranslationBlock *tb1, **ptb;
896 unsigned int n1;
897
898 ptb = &tb->jmp_next[n];
899 tb1 = *ptb;
900 if (tb1) {
901 /* find tb(n) in circular list */
902 for(;;) {
903 tb1 = *ptb;
904 n1 = (uintptr_t)tb1 & 3;
905 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
906 if (n1 == n && tb1 == tb)
907 break;
908 if (n1 == 2) {
909 ptb = &tb1->jmp_first;
910 } else {
911 ptb = &tb1->jmp_next[n1];
912 }
913 }
914 /* now we can suppress tb(n) from the list */
915 *ptb = tb->jmp_next[n];
916
917 tb->jmp_next[n] = NULL;
918 }
919 }
920
921 /* reset the jump entry 'n' of a TB so that it is not chained to
922 another TB */
923 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 {
925 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
926 }
927
928 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 {
930 CPUArchState *env;
931 PageDesc *p;
932 unsigned int h, n1;
933 tb_page_addr_t phys_pc;
934 TranslationBlock *tb1, *tb2;
935
936 /* remove the TB from the hash list */
937 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
938 h = tb_phys_hash_func(phys_pc);
939 tb_remove(&tb_phys_hash[h], tb,
940 offsetof(TranslationBlock, phys_hash_next));
941
942 /* remove the TB from the page list */
943 if (tb->page_addr[0] != page_addr) {
944 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
945 tb_page_remove(&p->first_tb, tb);
946 invalidate_page_bitmap(p);
947 }
948 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
949 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
950 tb_page_remove(&p->first_tb, tb);
951 invalidate_page_bitmap(p);
952 }
953
954 tb_invalidated_flag = 1;
955
956 /* remove the TB from the hash list */
957 h = tb_jmp_cache_hash_func(tb->pc);
958 for(env = first_cpu; env != NULL; env = env->next_cpu) {
959 if (env->tb_jmp_cache[h] == tb)
960 env->tb_jmp_cache[h] = NULL;
961 }
962
963 /* suppress this TB from the two jump lists */
964 tb_jmp_remove(tb, 0);
965 tb_jmp_remove(tb, 1);
966
967 /* suppress any remaining jumps to this TB */
968 tb1 = tb->jmp_first;
969 for(;;) {
970 n1 = (uintptr_t)tb1 & 3;
971 if (n1 == 2)
972 break;
973 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
974 tb2 = tb1->jmp_next[n1];
975 tb_reset_jump(tb1, n1);
976 tb1->jmp_next[n1] = NULL;
977 tb1 = tb2;
978 }
979 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980
981 tb_phys_invalidate_count++;
982 }
983
984 static inline void set_bits(uint8_t *tab, int start, int len)
985 {
986 int end, mask, end1;
987
988 end = start + len;
989 tab += start >> 3;
990 mask = 0xff << (start & 7);
991 if ((start & ~7) == (end & ~7)) {
992 if (start < end) {
993 mask &= ~(0xff << (end & 7));
994 *tab |= mask;
995 }
996 } else {
997 *tab++ |= mask;
998 start = (start + 8) & ~7;
999 end1 = end & ~7;
1000 while (start < end1) {
1001 *tab++ = 0xff;
1002 start += 8;
1003 }
1004 if (start < end) {
1005 mask = ~(0xff << (end & 7));
1006 *tab |= mask;
1007 }
1008 }
1009 }
1010
1011 static void build_page_bitmap(PageDesc *p)
1012 {
1013 int n, tb_start, tb_end;
1014 TranslationBlock *tb;
1015
1016 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1017
1018 tb = p->first_tb;
1019 while (tb != NULL) {
1020 n = (uintptr_t)tb & 3;
1021 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1022 /* NOTE: this is subtle as a TB may span two physical pages */
1023 if (n == 0) {
1024 /* NOTE: tb_end may be after the end of the page, but
1025 it is not a problem */
1026 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1027 tb_end = tb_start + tb->size;
1028 if (tb_end > TARGET_PAGE_SIZE)
1029 tb_end = TARGET_PAGE_SIZE;
1030 } else {
1031 tb_start = 0;
1032 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 }
1034 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1035 tb = tb->page_next[n];
1036 }
1037 }
1038
1039 TranslationBlock *tb_gen_code(CPUArchState *env,
1040 target_ulong pc, target_ulong cs_base,
1041 int flags, int cflags)
1042 {
1043 TranslationBlock *tb;
1044 uint8_t *tc_ptr;
1045 tb_page_addr_t phys_pc, phys_page2;
1046 target_ulong virt_page2;
1047 int code_gen_size;
1048
1049 phys_pc = get_page_addr_code(env, pc);
1050 tb = tb_alloc(pc);
1051 if (!tb) {
1052 /* flush must be done */
1053 tb_flush(env);
1054 /* cannot fail at this point */
1055 tb = tb_alloc(pc);
1056 /* Don't forget to invalidate previous TB info. */
1057 tb_invalidated_flag = 1;
1058 }
1059 tc_ptr = code_gen_ptr;
1060 tb->tc_ptr = tc_ptr;
1061 tb->cs_base = cs_base;
1062 tb->flags = flags;
1063 tb->cflags = cflags;
1064 cpu_gen_code(env, tb, &code_gen_size);
1065 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1066 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067
1068 /* check next page if needed */
1069 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 phys_page2 = -1;
1071 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1072 phys_page2 = get_page_addr_code(env, virt_page2);
1073 }
1074 tb_link_page(tb, phys_pc, phys_page2);
1075 return tb;
1076 }
1077
1078 /*
1079 * invalidate all TBs which intersect with the target physical pages
1080 * starting in range [start;end[. NOTE: start and end may refer to
1081 * different physical pages. 'is_cpu_write_access' should be true if called
1082 * from a real cpu write access: the virtual CPU will exit the current
1083 * TB if code is modified inside this TB.
1084 */
1085 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1086 int is_cpu_write_access)
1087 {
1088 while (start < end) {
1089 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1090 start &= TARGET_PAGE_MASK;
1091 start += TARGET_PAGE_SIZE;
1092 }
1093 }
1094
1095 /* invalidate all TBs which intersect with the target physical page
1096 starting in range [start;end[. NOTE: start and end must refer to
1097 the same physical page. 'is_cpu_write_access' should be true if called
1098 from a real cpu write access: the virtual CPU will exit the current
1099 TB if code is modified inside this TB. */
1100 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1101 int is_cpu_write_access)
1102 {
1103 TranslationBlock *tb, *tb_next, *saved_tb;
1104 CPUArchState *env = cpu_single_env;
1105 tb_page_addr_t tb_start, tb_end;
1106 PageDesc *p;
1107 int n;
1108 #ifdef TARGET_HAS_PRECISE_SMC
1109 int current_tb_not_found = is_cpu_write_access;
1110 TranslationBlock *current_tb = NULL;
1111 int current_tb_modified = 0;
1112 target_ulong current_pc = 0;
1113 target_ulong current_cs_base = 0;
1114 int current_flags = 0;
1115 #endif /* TARGET_HAS_PRECISE_SMC */
1116
1117 p = page_find(start >> TARGET_PAGE_BITS);
1118 if (!p)
1119 return;
1120 if (!p->code_bitmap &&
1121 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1122 is_cpu_write_access) {
1123 /* build code bitmap */
1124 build_page_bitmap(p);
1125 }
1126
1127 /* we remove all the TBs in the range [start, end[ */
1128 /* XXX: see if in some cases it could be faster to invalidate all the code */
1129 tb = p->first_tb;
1130 while (tb != NULL) {
1131 n = (uintptr_t)tb & 3;
1132 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1133 tb_next = tb->page_next[n];
1134 /* NOTE: this is subtle as a TB may span two physical pages */
1135 if (n == 0) {
1136 /* NOTE: tb_end may be after the end of the page, but
1137 it is not a problem */
1138 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1139 tb_end = tb_start + tb->size;
1140 } else {
1141 tb_start = tb->page_addr[1];
1142 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1143 }
1144 if (!(tb_end <= start || tb_start >= end)) {
1145 #ifdef TARGET_HAS_PRECISE_SMC
1146 if (current_tb_not_found) {
1147 current_tb_not_found = 0;
1148 current_tb = NULL;
1149 if (env->mem_io_pc) {
1150 /* now we have a real cpu fault */
1151 current_tb = tb_find_pc(env->mem_io_pc);
1152 }
1153 }
1154 if (current_tb == tb &&
1155 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1156 /* If we are modifying the current TB, we must stop
1157 its execution. We could be more precise by checking
1158 that the modification is after the current PC, but it
1159 would require a specialized function to partially
1160 restore the CPU state */
1161
1162 current_tb_modified = 1;
1163 cpu_restore_state(current_tb, env, env->mem_io_pc);
1164 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1165 &current_flags);
1166 }
1167 #endif /* TARGET_HAS_PRECISE_SMC */
1168 /* we need to do that to handle the case where a signal
1169 occurs while doing tb_phys_invalidate() */
1170 saved_tb = NULL;
1171 if (env) {
1172 saved_tb = env->current_tb;
1173 env->current_tb = NULL;
1174 }
1175 tb_phys_invalidate(tb, -1);
1176 if (env) {
1177 env->current_tb = saved_tb;
1178 if (env->interrupt_request && env->current_tb)
1179 cpu_interrupt(env, env->interrupt_request);
1180 }
1181 }
1182 tb = tb_next;
1183 }
1184 #if !defined(CONFIG_USER_ONLY)
1185 /* if no code remaining, no need to continue to use slow writes */
1186 if (!p->first_tb) {
1187 invalidate_page_bitmap(p);
1188 if (is_cpu_write_access) {
1189 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1190 }
1191 }
1192 #endif
1193 #ifdef TARGET_HAS_PRECISE_SMC
1194 if (current_tb_modified) {
1195 /* we generate a block containing just the instruction
1196 modifying the memory. It will ensure that it cannot modify
1197 itself */
1198 env->current_tb = NULL;
1199 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1200 cpu_resume_from_signal(env, NULL);
1201 }
1202 #endif
1203 }
1204
1205 /* len must be <= 8 and start must be a multiple of len */
1206 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1207 {
1208 PageDesc *p;
1209 int offset, b;
1210 #if 0
1211 if (1) {
1212 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1213 cpu_single_env->mem_io_vaddr, len,
1214 cpu_single_env->eip,
1215 cpu_single_env->eip +
1216 (intptr_t)cpu_single_env->segs[R_CS].base);
1217 }
1218 #endif
1219 p = page_find(start >> TARGET_PAGE_BITS);
1220 if (!p)
1221 return;
1222 if (p->code_bitmap) {
1223 offset = start & ~TARGET_PAGE_MASK;
1224 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1225 if (b & ((1 << len) - 1))
1226 goto do_invalidate;
1227 } else {
1228 do_invalidate:
1229 tb_invalidate_phys_page_range(start, start + len, 1);
1230 }
1231 }
1232
1233 #if !defined(CONFIG_SOFTMMU)
1234 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1235 uintptr_t pc, void *puc)
1236 {
1237 TranslationBlock *tb;
1238 PageDesc *p;
1239 int n;
1240 #ifdef TARGET_HAS_PRECISE_SMC
1241 TranslationBlock *current_tb = NULL;
1242 CPUArchState *env = cpu_single_env;
1243 int current_tb_modified = 0;
1244 target_ulong current_pc = 0;
1245 target_ulong current_cs_base = 0;
1246 int current_flags = 0;
1247 #endif
1248
1249 addr &= TARGET_PAGE_MASK;
1250 p = page_find(addr >> TARGET_PAGE_BITS);
1251 if (!p)
1252 return;
1253 tb = p->first_tb;
1254 #ifdef TARGET_HAS_PRECISE_SMC
1255 if (tb && pc != 0) {
1256 current_tb = tb_find_pc(pc);
1257 }
1258 #endif
1259 while (tb != NULL) {
1260 n = (uintptr_t)tb & 3;
1261 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1262 #ifdef TARGET_HAS_PRECISE_SMC
1263 if (current_tb == tb &&
1264 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1265 /* If we are modifying the current TB, we must stop
1266 its execution. We could be more precise by checking
1267 that the modification is after the current PC, but it
1268 would require a specialized function to partially
1269 restore the CPU state */
1270
1271 current_tb_modified = 1;
1272 cpu_restore_state(current_tb, env, pc);
1273 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1274 &current_flags);
1275 }
1276 #endif /* TARGET_HAS_PRECISE_SMC */
1277 tb_phys_invalidate(tb, addr);
1278 tb = tb->page_next[n];
1279 }
1280 p->first_tb = NULL;
1281 #ifdef TARGET_HAS_PRECISE_SMC
1282 if (current_tb_modified) {
1283 /* we generate a block containing just the instruction
1284 modifying the memory. It will ensure that it cannot modify
1285 itself */
1286 env->current_tb = NULL;
1287 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1288 cpu_resume_from_signal(env, puc);
1289 }
1290 #endif
1291 }
1292 #endif
1293
1294 /* add the tb in the target page and protect it if necessary */
1295 static inline void tb_alloc_page(TranslationBlock *tb,
1296 unsigned int n, tb_page_addr_t page_addr)
1297 {
1298 PageDesc *p;
1299 #ifndef CONFIG_USER_ONLY
1300 bool page_already_protected;
1301 #endif
1302
1303 tb->page_addr[n] = page_addr;
1304 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1305 tb->page_next[n] = p->first_tb;
1306 #ifndef CONFIG_USER_ONLY
1307 page_already_protected = p->first_tb != NULL;
1308 #endif
1309 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1310 invalidate_page_bitmap(p);
1311
1312 #if defined(TARGET_HAS_SMC) || 1
1313
1314 #if defined(CONFIG_USER_ONLY)
1315 if (p->flags & PAGE_WRITE) {
1316 target_ulong addr;
1317 PageDesc *p2;
1318 int prot;
1319
1320 /* force the host page as non writable (writes will have a
1321 page fault + mprotect overhead) */
1322 page_addr &= qemu_host_page_mask;
1323 prot = 0;
1324 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1325 addr += TARGET_PAGE_SIZE) {
1326
1327 p2 = page_find (addr >> TARGET_PAGE_BITS);
1328 if (!p2)
1329 continue;
1330 prot |= p2->flags;
1331 p2->flags &= ~PAGE_WRITE;
1332 }
1333 mprotect(g2h(page_addr), qemu_host_page_size,
1334 (prot & PAGE_BITS) & ~PAGE_WRITE);
1335 #ifdef DEBUG_TB_INVALIDATE
1336 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1337 page_addr);
1338 #endif
1339 }
1340 #else
1341 /* if some code is already present, then the pages are already
1342 protected. So we handle the case where only the first TB is
1343 allocated in a physical page */
1344 if (!page_already_protected) {
1345 tlb_protect_code(page_addr);
1346 }
1347 #endif
1348
1349 #endif /* TARGET_HAS_SMC */
1350 }
1351
1352 /* add a new TB and link it to the physical page tables. phys_page2 is
1353 (-1) to indicate that only one page contains the TB. */
1354 void tb_link_page(TranslationBlock *tb,
1355 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1356 {
1357 unsigned int h;
1358 TranslationBlock **ptb;
1359
1360 /* Grab the mmap lock to stop another thread invalidating this TB
1361 before we are done. */
1362 mmap_lock();
1363 /* add in the physical hash table */
1364 h = tb_phys_hash_func(phys_pc);
1365 ptb = &tb_phys_hash[h];
1366 tb->phys_hash_next = *ptb;
1367 *ptb = tb;
1368
1369 /* add in the page list */
1370 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1371 if (phys_page2 != -1)
1372 tb_alloc_page(tb, 1, phys_page2);
1373 else
1374 tb->page_addr[1] = -1;
1375
1376 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1377 tb->jmp_next[0] = NULL;
1378 tb->jmp_next[1] = NULL;
1379
1380 /* init original jump addresses */
1381 if (tb->tb_next_offset[0] != 0xffff)
1382 tb_reset_jump(tb, 0);
1383 if (tb->tb_next_offset[1] != 0xffff)
1384 tb_reset_jump(tb, 1);
1385
1386 #ifdef DEBUG_TB_CHECK
1387 tb_page_check();
1388 #endif
1389 mmap_unlock();
1390 }
1391
1392 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1393 tb[1].tc_ptr. Return NULL if not found */
1394 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1395 {
1396 int m_min, m_max, m;
1397 uintptr_t v;
1398 TranslationBlock *tb;
1399
1400 if (nb_tbs <= 0)
1401 return NULL;
1402 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1403 tc_ptr >= (uintptr_t)code_gen_ptr) {
1404 return NULL;
1405 }
1406 /* binary search (cf Knuth) */
1407 m_min = 0;
1408 m_max = nb_tbs - 1;
1409 while (m_min <= m_max) {
1410 m = (m_min + m_max) >> 1;
1411 tb = &tbs[m];
1412 v = (uintptr_t)tb->tc_ptr;
1413 if (v == tc_ptr)
1414 return tb;
1415 else if (tc_ptr < v) {
1416 m_max = m - 1;
1417 } else {
1418 m_min = m + 1;
1419 }
1420 }
1421 return &tbs[m_max];
1422 }
1423
1424 static void tb_reset_jump_recursive(TranslationBlock *tb);
1425
1426 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1427 {
1428 TranslationBlock *tb1, *tb_next, **ptb;
1429 unsigned int n1;
1430
1431 tb1 = tb->jmp_next[n];
1432 if (tb1 != NULL) {
1433 /* find head of list */
1434 for(;;) {
1435 n1 = (uintptr_t)tb1 & 3;
1436 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1437 if (n1 == 2)
1438 break;
1439 tb1 = tb1->jmp_next[n1];
1440 }
1441 /* we are now sure now that tb jumps to tb1 */
1442 tb_next = tb1;
1443
1444 /* remove tb from the jmp_first list */
1445 ptb = &tb_next->jmp_first;
1446 for(;;) {
1447 tb1 = *ptb;
1448 n1 = (uintptr_t)tb1 & 3;
1449 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1450 if (n1 == n && tb1 == tb)
1451 break;
1452 ptb = &tb1->jmp_next[n1];
1453 }
1454 *ptb = tb->jmp_next[n];
1455 tb->jmp_next[n] = NULL;
1456
1457 /* suppress the jump to next tb in generated code */
1458 tb_reset_jump(tb, n);
1459
1460 /* suppress jumps in the tb on which we could have jumped */
1461 tb_reset_jump_recursive(tb_next);
1462 }
1463 }
1464
1465 static void tb_reset_jump_recursive(TranslationBlock *tb)
1466 {
1467 tb_reset_jump_recursive2(tb, 0);
1468 tb_reset_jump_recursive2(tb, 1);
1469 }
1470
1471 #if defined(TARGET_HAS_ICE)
1472 #if defined(CONFIG_USER_ONLY)
1473 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1474 {
1475 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1476 }
1477 #else
1478 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1479 {
1480 ram_addr_t ram_addr;
1481 MemoryRegionSection *section;
1482
1483 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1484 if (!(memory_region_is_ram(section->mr)
1485 || (section->mr->rom_device && section->mr->readable))) {
1486 return;
1487 }
1488 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1489 + memory_region_section_addr(section, addr);
1490 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1491 }
1492
1493 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1494 {
1495 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc));
1496 }
1497 #endif
1498 #endif /* TARGET_HAS_ICE */
1499
1500 #if defined(CONFIG_USER_ONLY)
1501 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1502
1503 {
1504 }
1505
1506 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1507 int flags, CPUWatchpoint **watchpoint)
1508 {
1509 return -ENOSYS;
1510 }
1511 #else
1512 /* Add a watchpoint. */
1513 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1514 int flags, CPUWatchpoint **watchpoint)
1515 {
1516 target_ulong len_mask = ~(len - 1);
1517 CPUWatchpoint *wp;
1518
1519 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1520 if ((len & (len - 1)) || (addr & ~len_mask) ||
1521 len == 0 || len > TARGET_PAGE_SIZE) {
1522 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1523 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1524 return -EINVAL;
1525 }
1526 wp = g_malloc(sizeof(*wp));
1527
1528 wp->vaddr = addr;
1529 wp->len_mask = len_mask;
1530 wp->flags = flags;
1531
1532 /* keep all GDB-injected watchpoints in front */
1533 if (flags & BP_GDB)
1534 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1535 else
1536 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1537
1538 tlb_flush_page(env, addr);
1539
1540 if (watchpoint)
1541 *watchpoint = wp;
1542 return 0;
1543 }
1544
1545 /* Remove a specific watchpoint. */
1546 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1547 int flags)
1548 {
1549 target_ulong len_mask = ~(len - 1);
1550 CPUWatchpoint *wp;
1551
1552 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1553 if (addr == wp->vaddr && len_mask == wp->len_mask
1554 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1555 cpu_watchpoint_remove_by_ref(env, wp);
1556 return 0;
1557 }
1558 }
1559 return -ENOENT;
1560 }
1561
1562 /* Remove a specific watchpoint by reference. */
1563 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1564 {
1565 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1566
1567 tlb_flush_page(env, watchpoint->vaddr);
1568
1569 g_free(watchpoint);
1570 }
1571
1572 /* Remove all matching watchpoints. */
1573 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1574 {
1575 CPUWatchpoint *wp, *next;
1576
1577 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1578 if (wp->flags & mask)
1579 cpu_watchpoint_remove_by_ref(env, wp);
1580 }
1581 }
1582 #endif
1583
1584 /* Add a breakpoint. */
1585 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1586 CPUBreakpoint **breakpoint)
1587 {
1588 #if defined(TARGET_HAS_ICE)
1589 CPUBreakpoint *bp;
1590
1591 bp = g_malloc(sizeof(*bp));
1592
1593 bp->pc = pc;
1594 bp->flags = flags;
1595
1596 /* keep all GDB-injected breakpoints in front */
1597 if (flags & BP_GDB)
1598 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1599 else
1600 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1601
1602 breakpoint_invalidate(env, pc);
1603
1604 if (breakpoint)
1605 *breakpoint = bp;
1606 return 0;
1607 #else
1608 return -ENOSYS;
1609 #endif
1610 }
1611
1612 /* Remove a specific breakpoint. */
1613 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1614 {
1615 #if defined(TARGET_HAS_ICE)
1616 CPUBreakpoint *bp;
1617
1618 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1619 if (bp->pc == pc && bp->flags == flags) {
1620 cpu_breakpoint_remove_by_ref(env, bp);
1621 return 0;
1622 }
1623 }
1624 return -ENOENT;
1625 #else
1626 return -ENOSYS;
1627 #endif
1628 }
1629
1630 /* Remove a specific breakpoint by reference. */
1631 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1632 {
1633 #if defined(TARGET_HAS_ICE)
1634 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1635
1636 breakpoint_invalidate(env, breakpoint->pc);
1637
1638 g_free(breakpoint);
1639 #endif
1640 }
1641
1642 /* Remove all matching breakpoints. */
1643 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1644 {
1645 #if defined(TARGET_HAS_ICE)
1646 CPUBreakpoint *bp, *next;
1647
1648 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1649 if (bp->flags & mask)
1650 cpu_breakpoint_remove_by_ref(env, bp);
1651 }
1652 #endif
1653 }
1654
1655 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1656 CPU loop after each instruction */
1657 void cpu_single_step(CPUArchState *env, int enabled)
1658 {
1659 #if defined(TARGET_HAS_ICE)
1660 if (env->singlestep_enabled != enabled) {
1661 env->singlestep_enabled = enabled;
1662 if (kvm_enabled())
1663 kvm_update_guest_debug(env, 0);
1664 else {
1665 /* must flush all the translated code to avoid inconsistencies */
1666 /* XXX: only flush what is necessary */
1667 tb_flush(env);
1668 }
1669 }
1670 #endif
1671 }
1672
1673 /* enable or disable low levels log */
1674 void cpu_set_log(int log_flags)
1675 {
1676 loglevel = log_flags;
1677 if (loglevel && !logfile) {
1678 logfile = fopen(logfilename, log_append ? "a" : "w");
1679 if (!logfile) {
1680 perror(logfilename);
1681 _exit(1);
1682 }
1683 #if !defined(CONFIG_SOFTMMU)
1684 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1685 {
1686 static char logfile_buf[4096];
1687 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1688 }
1689 #elif defined(_WIN32)
1690 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1691 setvbuf(logfile, NULL, _IONBF, 0);
1692 #else
1693 setvbuf(logfile, NULL, _IOLBF, 0);
1694 #endif
1695 log_append = 1;
1696 }
1697 if (!loglevel && logfile) {
1698 fclose(logfile);
1699 logfile = NULL;
1700 }
1701 }
1702
1703 void cpu_set_log_filename(const char *filename)
1704 {
1705 logfilename = strdup(filename);
1706 if (logfile) {
1707 fclose(logfile);
1708 logfile = NULL;
1709 }
1710 cpu_set_log(loglevel);
1711 }
1712
1713 static void cpu_unlink_tb(CPUArchState *env)
1714 {
1715 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1716 problem and hope the cpu will stop of its own accord. For userspace
1717 emulation this often isn't actually as bad as it sounds. Often
1718 signals are used primarily to interrupt blocking syscalls. */
1719 TranslationBlock *tb;
1720 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1721
1722 spin_lock(&interrupt_lock);
1723 tb = env->current_tb;
1724 /* if the cpu is currently executing code, we must unlink it and
1725 all the potentially executing TB */
1726 if (tb) {
1727 env->current_tb = NULL;
1728 tb_reset_jump_recursive(tb);
1729 }
1730 spin_unlock(&interrupt_lock);
1731 }
1732
1733 #ifndef CONFIG_USER_ONLY
1734 /* mask must never be zero, except for A20 change call */
1735 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1736 {
1737 int old_mask;
1738
1739 old_mask = env->interrupt_request;
1740 env->interrupt_request |= mask;
1741
1742 /*
1743 * If called from iothread context, wake the target cpu in
1744 * case its halted.
1745 */
1746 if (!qemu_cpu_is_self(env)) {
1747 qemu_cpu_kick(env);
1748 return;
1749 }
1750
1751 if (use_icount) {
1752 env->icount_decr.u16.high = 0xffff;
1753 if (!can_do_io(env)
1754 && (mask & ~old_mask) != 0) {
1755 cpu_abort(env, "Raised interrupt while not in I/O function");
1756 }
1757 } else {
1758 cpu_unlink_tb(env);
1759 }
1760 }
1761
1762 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1763
1764 #else /* CONFIG_USER_ONLY */
1765
1766 void cpu_interrupt(CPUArchState *env, int mask)
1767 {
1768 env->interrupt_request |= mask;
1769 cpu_unlink_tb(env);
1770 }
1771 #endif /* CONFIG_USER_ONLY */
1772
1773 void cpu_reset_interrupt(CPUArchState *env, int mask)
1774 {
1775 env->interrupt_request &= ~mask;
1776 }
1777
1778 void cpu_exit(CPUArchState *env)
1779 {
1780 env->exit_request = 1;
1781 cpu_unlink_tb(env);
1782 }
1783
1784 const CPULogItem cpu_log_items[] = {
1785 { CPU_LOG_TB_OUT_ASM, "out_asm",
1786 "show generated host assembly code for each compiled TB" },
1787 { CPU_LOG_TB_IN_ASM, "in_asm",
1788 "show target assembly code for each compiled TB" },
1789 { CPU_LOG_TB_OP, "op",
1790 "show micro ops for each compiled TB" },
1791 { CPU_LOG_TB_OP_OPT, "op_opt",
1792 "show micro ops "
1793 #ifdef TARGET_I386
1794 "before eflags optimization and "
1795 #endif
1796 "after liveness analysis" },
1797 { CPU_LOG_INT, "int",
1798 "show interrupts/exceptions in short format" },
1799 { CPU_LOG_EXEC, "exec",
1800 "show trace before each executed TB (lots of logs)" },
1801 { CPU_LOG_TB_CPU, "cpu",
1802 "show CPU state before block translation" },
1803 #ifdef TARGET_I386
1804 { CPU_LOG_PCALL, "pcall",
1805 "show protected mode far calls/returns/exceptions" },
1806 { CPU_LOG_RESET, "cpu_reset",
1807 "show CPU state before CPU resets" },
1808 #endif
1809 #ifdef DEBUG_IOPORT
1810 { CPU_LOG_IOPORT, "ioport",
1811 "show all i/o ports accesses" },
1812 #endif
1813 { 0, NULL, NULL },
1814 };
1815
1816 static int cmp1(const char *s1, int n, const char *s2)
1817 {
1818 if (strlen(s2) != n)
1819 return 0;
1820 return memcmp(s1, s2, n) == 0;
1821 }
1822
1823 /* takes a comma separated list of log masks. Return 0 if error. */
1824 int cpu_str_to_log_mask(const char *str)
1825 {
1826 const CPULogItem *item;
1827 int mask;
1828 const char *p, *p1;
1829
1830 p = str;
1831 mask = 0;
1832 for(;;) {
1833 p1 = strchr(p, ',');
1834 if (!p1)
1835 p1 = p + strlen(p);
1836 if(cmp1(p,p1-p,"all")) {
1837 for(item = cpu_log_items; item->mask != 0; item++) {
1838 mask |= item->mask;
1839 }
1840 } else {
1841 for(item = cpu_log_items; item->mask != 0; item++) {
1842 if (cmp1(p, p1 - p, item->name))
1843 goto found;
1844 }
1845 return 0;
1846 }
1847 found:
1848 mask |= item->mask;
1849 if (*p1 != ',')
1850 break;
1851 p = p1 + 1;
1852 }
1853 return mask;
1854 }
1855
1856 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1857 {
1858 va_list ap;
1859 va_list ap2;
1860
1861 va_start(ap, fmt);
1862 va_copy(ap2, ap);
1863 fprintf(stderr, "qemu: fatal: ");
1864 vfprintf(stderr, fmt, ap);
1865 fprintf(stderr, "\n");
1866 #ifdef TARGET_I386
1867 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1868 #else
1869 cpu_dump_state(env, stderr, fprintf, 0);
1870 #endif
1871 if (qemu_log_enabled()) {
1872 qemu_log("qemu: fatal: ");
1873 qemu_log_vprintf(fmt, ap2);
1874 qemu_log("\n");
1875 #ifdef TARGET_I386
1876 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1877 #else
1878 log_cpu_state(env, 0);
1879 #endif
1880 qemu_log_flush();
1881 qemu_log_close();
1882 }
1883 va_end(ap2);
1884 va_end(ap);
1885 #if defined(CONFIG_USER_ONLY)
1886 {
1887 struct sigaction act;
1888 sigfillset(&act.sa_mask);
1889 act.sa_handler = SIG_DFL;
1890 sigaction(SIGABRT, &act, NULL);
1891 }
1892 #endif
1893 abort();
1894 }
1895
1896 CPUArchState *cpu_copy(CPUArchState *env)
1897 {
1898 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1899 CPUArchState *next_cpu = new_env->next_cpu;
1900 int cpu_index = new_env->cpu_index;
1901 #if defined(TARGET_HAS_ICE)
1902 CPUBreakpoint *bp;
1903 CPUWatchpoint *wp;
1904 #endif
1905
1906 memcpy(new_env, env, sizeof(CPUArchState));
1907
1908 /* Preserve chaining and index. */
1909 new_env->next_cpu = next_cpu;
1910 new_env->cpu_index = cpu_index;
1911
1912 /* Clone all break/watchpoints.
1913 Note: Once we support ptrace with hw-debug register access, make sure
1914 BP_CPU break/watchpoints are handled correctly on clone. */
1915 QTAILQ_INIT(&env->breakpoints);
1916 QTAILQ_INIT(&env->watchpoints);
1917 #if defined(TARGET_HAS_ICE)
1918 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1919 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1920 }
1921 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1922 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1923 wp->flags, NULL);
1924 }
1925 #endif
1926
1927 return new_env;
1928 }
1929
1930 #if !defined(CONFIG_USER_ONLY)
1931 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1932 {
1933 unsigned int i;
1934
1935 /* Discard jump cache entries for any tb which might potentially
1936 overlap the flushed page. */
1937 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1938 memset (&env->tb_jmp_cache[i], 0,
1939 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1940
1941 i = tb_jmp_cache_hash_page(addr);
1942 memset (&env->tb_jmp_cache[i], 0,
1943 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1944 }
1945
1946 /* Note: start and end must be within the same ram block. */
1947 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1948 int dirty_flags)
1949 {
1950 uintptr_t length, start1;
1951
1952 start &= TARGET_PAGE_MASK;
1953 end = TARGET_PAGE_ALIGN(end);
1954
1955 length = end - start;
1956 if (length == 0)
1957 return;
1958 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1959
1960 /* we modify the TLB cache so that the dirty bit will be set again
1961 when accessing the range */
1962 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1963 /* Check that we don't span multiple blocks - this breaks the
1964 address comparisons below. */
1965 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1966 != (end - 1) - start) {
1967 abort();
1968 }
1969 cpu_tlb_reset_dirty_all(start1, length);
1970 }
1971
1972 int cpu_physical_memory_set_dirty_tracking(int enable)
1973 {
1974 int ret = 0;
1975 in_migration = enable;
1976 return ret;
1977 }
1978
1979 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1980 MemoryRegionSection *section,
1981 target_ulong vaddr,
1982 target_phys_addr_t paddr,
1983 int prot,
1984 target_ulong *address)
1985 {
1986 target_phys_addr_t iotlb;
1987 CPUWatchpoint *wp;
1988
1989 if (memory_region_is_ram(section->mr)) {
1990 /* Normal RAM. */
1991 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1992 + memory_region_section_addr(section, paddr);
1993 if (!section->readonly) {
1994 iotlb |= phys_section_notdirty;
1995 } else {
1996 iotlb |= phys_section_rom;
1997 }
1998 } else {
1999 /* IO handlers are currently passed a physical address.
2000 It would be nice to pass an offset from the base address
2001 of that region. This would avoid having to special case RAM,
2002 and avoid full address decoding in every device.
2003 We can't use the high bits of pd for this because
2004 IO_MEM_ROMD uses these as a ram address. */
2005 iotlb = section - phys_sections;
2006 iotlb += memory_region_section_addr(section, paddr);
2007 }
2008
2009 /* Make accesses to pages with watchpoints go via the
2010 watchpoint trap routines. */
2011 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2012 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2013 /* Avoid trapping reads of pages with a write breakpoint. */
2014 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2015 iotlb = phys_section_watch + paddr;
2016 *address |= TLB_MMIO;
2017 break;
2018 }
2019 }
2020 }
2021
2022 return iotlb;
2023 }
2024
2025 #else
2026 /*
2027 * Walks guest process memory "regions" one by one
2028 * and calls callback function 'fn' for each region.
2029 */
2030
2031 struct walk_memory_regions_data
2032 {
2033 walk_memory_regions_fn fn;
2034 void *priv;
2035 uintptr_t start;
2036 int prot;
2037 };
2038
2039 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2040 abi_ulong end, int new_prot)
2041 {
2042 if (data->start != -1ul) {
2043 int rc = data->fn(data->priv, data->start, end, data->prot);
2044 if (rc != 0) {
2045 return rc;
2046 }
2047 }
2048
2049 data->start = (new_prot ? end : -1ul);
2050 data->prot = new_prot;
2051
2052 return 0;
2053 }
2054
2055 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2056 abi_ulong base, int level, void **lp)
2057 {
2058 abi_ulong pa;
2059 int i, rc;
2060
2061 if (*lp == NULL) {
2062 return walk_memory_regions_end(data, base, 0);
2063 }
2064
2065 if (level == 0) {
2066 PageDesc *pd = *lp;
2067 for (i = 0; i < L2_SIZE; ++i) {
2068 int prot = pd[i].flags;
2069
2070 pa = base | (i << TARGET_PAGE_BITS);
2071 if (prot != data->prot) {
2072 rc = walk_memory_regions_end(data, pa, prot);
2073 if (rc != 0) {
2074 return rc;
2075 }
2076 }
2077 }
2078 } else {
2079 void **pp = *lp;
2080 for (i = 0; i < L2_SIZE; ++i) {
2081 pa = base | ((abi_ulong)i <<
2082 (TARGET_PAGE_BITS + L2_BITS * level));
2083 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2084 if (rc != 0) {
2085 return rc;
2086 }
2087 }
2088 }
2089
2090 return 0;
2091 }
2092
2093 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2094 {
2095 struct walk_memory_regions_data data;
2096 uintptr_t i;
2097
2098 data.fn = fn;
2099 data.priv = priv;
2100 data.start = -1ul;
2101 data.prot = 0;
2102
2103 for (i = 0; i < V_L1_SIZE; i++) {
2104 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2105 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2106 if (rc != 0) {
2107 return rc;
2108 }
2109 }
2110
2111 return walk_memory_regions_end(&data, 0, 0);
2112 }
2113
2114 static int dump_region(void *priv, abi_ulong start,
2115 abi_ulong end, unsigned long prot)
2116 {
2117 FILE *f = (FILE *)priv;
2118
2119 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2120 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2121 start, end, end - start,
2122 ((prot & PAGE_READ) ? 'r' : '-'),
2123 ((prot & PAGE_WRITE) ? 'w' : '-'),
2124 ((prot & PAGE_EXEC) ? 'x' : '-'));
2125
2126 return (0);
2127 }
2128
2129 /* dump memory mappings */
2130 void page_dump(FILE *f)
2131 {
2132 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2133 "start", "end", "size", "prot");
2134 walk_memory_regions(f, dump_region);
2135 }
2136
2137 int page_get_flags(target_ulong address)
2138 {
2139 PageDesc *p;
2140
2141 p = page_find(address >> TARGET_PAGE_BITS);
2142 if (!p)
2143 return 0;
2144 return p->flags;
2145 }
2146
2147 /* Modify the flags of a page and invalidate the code if necessary.
2148 The flag PAGE_WRITE_ORG is positioned automatically depending
2149 on PAGE_WRITE. The mmap_lock should already be held. */
2150 void page_set_flags(target_ulong start, target_ulong end, int flags)
2151 {
2152 target_ulong addr, len;
2153
2154 /* This function should never be called with addresses outside the
2155 guest address space. If this assert fires, it probably indicates
2156 a missing call to h2g_valid. */
2157 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2158 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2159 #endif
2160 assert(start < end);
2161
2162 start = start & TARGET_PAGE_MASK;
2163 end = TARGET_PAGE_ALIGN(end);
2164
2165 if (flags & PAGE_WRITE) {
2166 flags |= PAGE_WRITE_ORG;
2167 }
2168
2169 for (addr = start, len = end - start;
2170 len != 0;
2171 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2172 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2173
2174 /* If the write protection bit is set, then we invalidate
2175 the code inside. */
2176 if (!(p->flags & PAGE_WRITE) &&
2177 (flags & PAGE_WRITE) &&
2178 p->first_tb) {
2179 tb_invalidate_phys_page(addr, 0, NULL);
2180 }
2181 p->flags = flags;
2182 }
2183 }
2184
2185 int page_check_range(target_ulong start, target_ulong len, int flags)
2186 {
2187 PageDesc *p;
2188 target_ulong end;
2189 target_ulong addr;
2190
2191 /* This function should never be called with addresses outside the
2192 guest address space. If this assert fires, it probably indicates
2193 a missing call to h2g_valid. */
2194 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2195 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2196 #endif
2197
2198 if (len == 0) {
2199 return 0;
2200 }
2201 if (start + len - 1 < start) {
2202 /* We've wrapped around. */
2203 return -1;
2204 }
2205
2206 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2207 start = start & TARGET_PAGE_MASK;
2208
2209 for (addr = start, len = end - start;
2210 len != 0;
2211 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2212 p = page_find(addr >> TARGET_PAGE_BITS);
2213 if( !p )
2214 return -1;
2215 if( !(p->flags & PAGE_VALID) )
2216 return -1;
2217
2218 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2219 return -1;
2220 if (flags & PAGE_WRITE) {
2221 if (!(p->flags & PAGE_WRITE_ORG))
2222 return -1;
2223 /* unprotect the page if it was put read-only because it
2224 contains translated code */
2225 if (!(p->flags & PAGE_WRITE)) {
2226 if (!page_unprotect(addr, 0, NULL))
2227 return -1;
2228 }
2229 return 0;
2230 }
2231 }
2232 return 0;
2233 }
2234
2235 /* called from signal handler: invalidate the code and unprotect the
2236 page. Return TRUE if the fault was successfully handled. */
2237 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2238 {
2239 unsigned int prot;
2240 PageDesc *p;
2241 target_ulong host_start, host_end, addr;
2242
2243 /* Technically this isn't safe inside a signal handler. However we
2244 know this only ever happens in a synchronous SEGV handler, so in
2245 practice it seems to be ok. */
2246 mmap_lock();
2247
2248 p = page_find(address >> TARGET_PAGE_BITS);
2249 if (!p) {
2250 mmap_unlock();
2251 return 0;
2252 }
2253
2254 /* if the page was really writable, then we change its
2255 protection back to writable */
2256 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2257 host_start = address & qemu_host_page_mask;
2258 host_end = host_start + qemu_host_page_size;
2259
2260 prot = 0;
2261 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2262 p = page_find(addr >> TARGET_PAGE_BITS);
2263 p->flags |= PAGE_WRITE;
2264 prot |= p->flags;
2265
2266 /* and since the content will be modified, we must invalidate
2267 the corresponding translated code. */
2268 tb_invalidate_phys_page(addr, pc, puc);
2269 #ifdef DEBUG_TB_CHECK
2270 tb_invalidate_check(addr);
2271 #endif
2272 }
2273 mprotect((void *)g2h(host_start), qemu_host_page_size,
2274 prot & PAGE_BITS);
2275
2276 mmap_unlock();
2277 return 1;
2278 }
2279 mmap_unlock();
2280 return 0;
2281 }
2282 #endif /* defined(CONFIG_USER_ONLY) */
2283
2284 #if !defined(CONFIG_USER_ONLY)
2285
2286 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2287 typedef struct subpage_t {
2288 MemoryRegion iomem;
2289 target_phys_addr_t base;
2290 uint16_t sub_section[TARGET_PAGE_SIZE];
2291 } subpage_t;
2292
2293 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2294 uint16_t section);
2295 static subpage_t *subpage_init(target_phys_addr_t base);
2296 static void destroy_page_desc(uint16_t section_index)
2297 {
2298 MemoryRegionSection *section = &phys_sections[section_index];
2299 MemoryRegion *mr = section->mr;
2300
2301 if (mr->subpage) {
2302 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2303 memory_region_destroy(&subpage->iomem);
2304 g_free(subpage);
2305 }
2306 }
2307
2308 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2309 {
2310 unsigned i;
2311 PhysPageEntry *p;
2312
2313 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2314 return;
2315 }
2316
2317 p = phys_map_nodes[lp->ptr];
2318 for (i = 0; i < L2_SIZE; ++i) {
2319 if (!p[i].is_leaf) {
2320 destroy_l2_mapping(&p[i], level - 1);
2321 } else {
2322 destroy_page_desc(p[i].ptr);
2323 }
2324 }
2325 lp->is_leaf = 0;
2326 lp->ptr = PHYS_MAP_NODE_NIL;
2327 }
2328
2329 static void destroy_all_mappings(void)
2330 {
2331 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2332 phys_map_nodes_reset();
2333 }
2334
2335 static uint16_t phys_section_add(MemoryRegionSection *section)
2336 {
2337 if (phys_sections_nb == phys_sections_nb_alloc) {
2338 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2339 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2340 phys_sections_nb_alloc);
2341 }
2342 phys_sections[phys_sections_nb] = *section;
2343 return phys_sections_nb++;
2344 }
2345
2346 static void phys_sections_clear(void)
2347 {
2348 phys_sections_nb = 0;
2349 }
2350
2351 /* register physical memory.
2352 For RAM, 'size' must be a multiple of the target page size.
2353 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2354 io memory page. The address used when calling the IO function is
2355 the offset from the start of the region, plus region_offset. Both
2356 start_addr and region_offset are rounded down to a page boundary
2357 before calculating this offset. This should not be a problem unless
2358 the low bits of start_addr and region_offset differ. */
2359 static void register_subpage(MemoryRegionSection *section)
2360 {
2361 subpage_t *subpage;
2362 target_phys_addr_t base = section->offset_within_address_space
2363 & TARGET_PAGE_MASK;
2364 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2365 MemoryRegionSection subsection = {
2366 .offset_within_address_space = base,
2367 .size = TARGET_PAGE_SIZE,
2368 };
2369 target_phys_addr_t start, end;
2370
2371 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2372
2373 if (!(existing->mr->subpage)) {
2374 subpage = subpage_init(base);
2375 subsection.mr = &subpage->iomem;
2376 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2377 phys_section_add(&subsection));
2378 } else {
2379 subpage = container_of(existing->mr, subpage_t, iomem);
2380 }
2381 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2382 end = start + section->size;
2383 subpage_register(subpage, start, end, phys_section_add(section));
2384 }
2385
2386
2387 static void register_multipage(MemoryRegionSection *section)
2388 {
2389 target_phys_addr_t start_addr = section->offset_within_address_space;
2390 ram_addr_t size = section->size;
2391 target_phys_addr_t addr;
2392 uint16_t section_index = phys_section_add(section);
2393
2394 assert(size);
2395
2396 addr = start_addr;
2397 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2398 section_index);
2399 }
2400
2401 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2402 bool readonly)
2403 {
2404 MemoryRegionSection now = *section, remain = *section;
2405
2406 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2407 || (now.size < TARGET_PAGE_SIZE)) {
2408 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2409 - now.offset_within_address_space,
2410 now.size);
2411 register_subpage(&now);
2412 remain.size -= now.size;
2413 remain.offset_within_address_space += now.size;
2414 remain.offset_within_region += now.size;
2415 }
2416 now = remain;
2417 now.size &= TARGET_PAGE_MASK;
2418 if (now.size) {
2419 register_multipage(&now);
2420 remain.size -= now.size;
2421 remain.offset_within_address_space += now.size;
2422 remain.offset_within_region += now.size;
2423 }
2424 now = remain;
2425 if (now.size) {
2426 register_subpage(&now);
2427 }
2428 }
2429
2430
2431 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2432 {
2433 if (kvm_enabled())
2434 kvm_coalesce_mmio_region(addr, size);
2435 }
2436
2437 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2438 {
2439 if (kvm_enabled())
2440 kvm_uncoalesce_mmio_region(addr, size);
2441 }
2442
2443 void qemu_flush_coalesced_mmio_buffer(void)
2444 {
2445 if (kvm_enabled())
2446 kvm_flush_coalesced_mmio_buffer();
2447 }
2448
2449 #if defined(__linux__) && !defined(TARGET_S390X)
2450
2451 #include <sys/vfs.h>
2452
2453 #define HUGETLBFS_MAGIC 0x958458f6
2454
2455 static long gethugepagesize(const char *path)
2456 {
2457 struct statfs fs;
2458 int ret;
2459
2460 do {
2461 ret = statfs(path, &fs);
2462 } while (ret != 0 && errno == EINTR);
2463
2464 if (ret != 0) {
2465 perror(path);
2466 return 0;
2467 }
2468
2469 if (fs.f_type != HUGETLBFS_MAGIC)
2470 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2471
2472 return fs.f_bsize;
2473 }
2474
2475 static void *file_ram_alloc(RAMBlock *block,
2476 ram_addr_t memory,
2477 const char *path)
2478 {
2479 char *filename;
2480 void *area;
2481 int fd;
2482 #ifdef MAP_POPULATE
2483 int flags;
2484 #endif
2485 unsigned long hpagesize;
2486
2487 hpagesize = gethugepagesize(path);
2488 if (!hpagesize) {
2489 return NULL;
2490 }
2491
2492 if (memory < hpagesize) {
2493 return NULL;
2494 }
2495
2496 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2497 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2498 return NULL;
2499 }
2500
2501 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2502 return NULL;
2503 }
2504
2505 fd = mkstemp(filename);
2506 if (fd < 0) {
2507 perror("unable to create backing store for hugepages");
2508 free(filename);
2509 return NULL;
2510 }
2511 unlink(filename);
2512 free(filename);
2513
2514 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2515
2516 /*
2517 * ftruncate is not supported by hugetlbfs in older
2518 * hosts, so don't bother bailing out on errors.
2519 * If anything goes wrong with it under other filesystems,
2520 * mmap will fail.
2521 */
2522 if (ftruncate(fd, memory))
2523 perror("ftruncate");
2524
2525 #ifdef MAP_POPULATE
2526 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2527 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2528 * to sidestep this quirk.
2529 */
2530 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2531 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2532 #else
2533 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2534 #endif
2535 if (area == MAP_FAILED) {
2536 perror("file_ram_alloc: can't mmap RAM pages");
2537 close(fd);
2538 return (NULL);
2539 }
2540 block->fd = fd;
2541 return area;
2542 }
2543 #endif
2544
2545 static ram_addr_t find_ram_offset(ram_addr_t size)
2546 {
2547 RAMBlock *block, *next_block;
2548 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2549
2550 if (QLIST_EMPTY(&ram_list.blocks))
2551 return 0;
2552
2553 QLIST_FOREACH(block, &ram_list.blocks, next) {
2554 ram_addr_t end, next = RAM_ADDR_MAX;
2555
2556 end = block->offset + block->length;
2557
2558 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2559 if (next_block->offset >= end) {
2560 next = MIN(next, next_block->offset);
2561 }
2562 }
2563 if (next - end >= size && next - end < mingap) {
2564 offset = end;
2565 mingap = next - end;
2566 }
2567 }
2568
2569 if (offset == RAM_ADDR_MAX) {
2570 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2571 (uint64_t)size);
2572 abort();
2573 }
2574
2575 return offset;
2576 }
2577
2578 static ram_addr_t last_ram_offset(void)
2579 {
2580 RAMBlock *block;
2581 ram_addr_t last = 0;
2582
2583 QLIST_FOREACH(block, &ram_list.blocks, next)
2584 last = MAX(last, block->offset + block->length);
2585
2586 return last;
2587 }
2588
2589 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2590 {
2591 RAMBlock *new_block, *block;
2592
2593 new_block = NULL;
2594 QLIST_FOREACH(block, &ram_list.blocks, next) {
2595 if (block->offset == addr) {
2596 new_block = block;
2597 break;
2598 }
2599 }
2600 assert(new_block);
2601 assert(!new_block->idstr[0]);
2602
2603 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2604 char *id = dev->parent_bus->info->get_dev_path(dev);
2605 if (id) {
2606 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2607 g_free(id);
2608 }
2609 }
2610 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2611
2612 QLIST_FOREACH(block, &ram_list.blocks, next) {
2613 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2614 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2615 new_block->idstr);
2616 abort();
2617 }
2618 }
2619 }
2620
2621 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2622 MemoryRegion *mr)
2623 {
2624 RAMBlock *new_block;
2625
2626 size = TARGET_PAGE_ALIGN(size);
2627 new_block = g_malloc0(sizeof(*new_block));
2628
2629 new_block->mr = mr;
2630 new_block->offset = find_ram_offset(size);
2631 if (host) {
2632 new_block->host = host;
2633 new_block->flags |= RAM_PREALLOC_MASK;
2634 } else {
2635 if (mem_path) {
2636 #if defined (__linux__) && !defined(TARGET_S390X)
2637 new_block->host = file_ram_alloc(new_block, size, mem_path);
2638 if (!new_block->host) {
2639 new_block->host = qemu_vmalloc(size);
2640 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2641 }
2642 #else
2643 fprintf(stderr, "-mem-path option unsupported\n");
2644 exit(1);
2645 #endif
2646 } else {
2647 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2648 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2649 an system defined value, which is at least 256GB. Larger systems
2650 have larger values. We put the guest between the end of data
2651 segment (system break) and this value. We use 32GB as a base to
2652 have enough room for the system break to grow. */
2653 new_block->host = mmap((void*)0x800000000, size,
2654 PROT_EXEC|PROT_READ|PROT_WRITE,
2655 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2656 if (new_block->host == MAP_FAILED) {
2657 fprintf(stderr, "Allocating RAM failed\n");
2658 abort();
2659 }
2660 #else
2661 if (xen_enabled()) {
2662 xen_ram_alloc(new_block->offset, size, mr);
2663 } else {
2664 new_block->host = qemu_vmalloc(size);
2665 }
2666 #endif
2667 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2668 }
2669 }
2670 new_block->length = size;
2671
2672 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2673
2674 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2675 last_ram_offset() >> TARGET_PAGE_BITS);
2676 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2677 0xff, size >> TARGET_PAGE_BITS);
2678
2679 if (kvm_enabled())
2680 kvm_setup_guest_memory(new_block->host, size);
2681
2682 return new_block->offset;
2683 }
2684
2685 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2686 {
2687 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2688 }
2689
2690 void qemu_ram_free_from_ptr(ram_addr_t addr)
2691 {
2692 RAMBlock *block;
2693
2694 QLIST_FOREACH(block, &ram_list.blocks, next) {
2695 if (addr == block->offset) {
2696 QLIST_REMOVE(block, next);
2697 g_free(block);
2698 return;
2699 }
2700 }
2701 }
2702
2703 void qemu_ram_free(ram_addr_t addr)
2704 {
2705 RAMBlock *block;
2706
2707 QLIST_FOREACH(block, &ram_list.blocks, next) {
2708 if (addr == block->offset) {
2709 QLIST_REMOVE(block, next);
2710 if (block->flags & RAM_PREALLOC_MASK) {
2711 ;
2712 } else if (mem_path) {
2713 #if defined (__linux__) && !defined(TARGET_S390X)
2714 if (block->fd) {
2715 munmap(block->host, block->length);
2716 close(block->fd);
2717 } else {
2718 qemu_vfree(block->host);
2719 }
2720 #else
2721 abort();
2722 #endif
2723 } else {
2724 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2725 munmap(block->host, block->length);
2726 #else
2727 if (xen_enabled()) {
2728 xen_invalidate_map_cache_entry(block->host);
2729 } else {
2730 qemu_vfree(block->host);
2731 }
2732 #endif
2733 }
2734 g_free(block);
2735 return;
2736 }
2737 }
2738
2739 }
2740
2741 #ifndef _WIN32
2742 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2743 {
2744 RAMBlock *block;
2745 ram_addr_t offset;
2746 int flags;
2747 void *area, *vaddr;
2748
2749 QLIST_FOREACH(block, &ram_list.blocks, next) {
2750 offset = addr - block->offset;
2751 if (offset < block->length) {
2752 vaddr = block->host + offset;
2753 if (block->flags & RAM_PREALLOC_MASK) {
2754 ;
2755 } else {
2756 flags = MAP_FIXED;
2757 munmap(vaddr, length);
2758 if (mem_path) {
2759 #if defined(__linux__) && !defined(TARGET_S390X)
2760 if (block->fd) {
2761 #ifdef MAP_POPULATE
2762 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2763 MAP_PRIVATE;
2764 #else
2765 flags |= MAP_PRIVATE;
2766 #endif
2767 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2768 flags, block->fd, offset);
2769 } else {
2770 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2771 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2772 flags, -1, 0);
2773 }
2774 #else
2775 abort();
2776 #endif
2777 } else {
2778 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2779 flags |= MAP_SHARED | MAP_ANONYMOUS;
2780 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2781 flags, -1, 0);
2782 #else
2783 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2784 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2785 flags, -1, 0);
2786 #endif
2787 }
2788 if (area != vaddr) {
2789 fprintf(stderr, "Could not remap addr: "
2790 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2791 length, addr);
2792 exit(1);
2793 }
2794 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2795 }
2796 return;
2797 }
2798 }
2799 }
2800 #endif /* !_WIN32 */
2801
2802 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2803 With the exception of the softmmu code in this file, this should
2804 only be used for local memory (e.g. video ram) that the device owns,
2805 and knows it isn't going to access beyond the end of the block.
2806
2807 It should not be used for general purpose DMA.
2808 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2809 */
2810 void *qemu_get_ram_ptr(ram_addr_t addr)
2811 {
2812 RAMBlock *block;
2813
2814 QLIST_FOREACH(block, &ram_list.blocks, next) {
2815 if (addr - block->offset < block->length) {
2816 /* Move this entry to to start of the list. */
2817 if (block != QLIST_FIRST(&ram_list.blocks)) {
2818 QLIST_REMOVE(block, next);
2819 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2820 }
2821 if (xen_enabled()) {
2822 /* We need to check if the requested address is in the RAM
2823 * because we don't want to map the entire memory in QEMU.
2824 * In that case just map until the end of the page.
2825 */
2826 if (block->offset == 0) {
2827 return xen_map_cache(addr, 0, 0);
2828 } else if (block->host == NULL) {
2829 block->host =
2830 xen_map_cache(block->offset, block->length, 1);
2831 }
2832 }
2833 return block->host + (addr - block->offset);
2834 }
2835 }
2836
2837 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2838 abort();
2839
2840 return NULL;
2841 }
2842
2843 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2844 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2845 */
2846 void *qemu_safe_ram_ptr(ram_addr_t addr)
2847 {
2848 RAMBlock *block;
2849
2850 QLIST_FOREACH(block, &ram_list.blocks, next) {
2851 if (addr - block->offset < block->length) {
2852 if (xen_enabled()) {
2853 /* We need to check if the requested address is in the RAM
2854 * because we don't want to map the entire memory in QEMU.
2855 * In that case just map until the end of the page.
2856 */
2857 if (block->offset == 0) {
2858 return xen_map_cache(addr, 0, 0);
2859 } else if (block->host == NULL) {
2860 block->host =
2861 xen_map_cache(block->offset, block->length, 1);
2862 }
2863 }
2864 return block->host + (addr - block->offset);
2865 }
2866 }
2867
2868 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2869 abort();
2870
2871 return NULL;
2872 }
2873
2874 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2875 * but takes a size argument */
2876 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2877 {
2878 if (*size == 0) {
2879 return NULL;
2880 }
2881 if (xen_enabled()) {
2882 return xen_map_cache(addr, *size, 1);
2883 } else {
2884 RAMBlock *block;
2885
2886 QLIST_FOREACH(block, &ram_list.blocks, next) {
2887 if (addr - block->offset < block->length) {
2888 if (addr - block->offset + *size > block->length)
2889 *size = block->length - addr + block->offset;
2890 return block->host + (addr - block->offset);
2891 }
2892 }
2893
2894 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2895 abort();
2896 }
2897 }
2898
2899 void qemu_put_ram_ptr(void *addr)
2900 {
2901 trace_qemu_put_ram_ptr(addr);
2902 }
2903
2904 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2905 {
2906 RAMBlock *block;
2907 uint8_t *host = ptr;
2908
2909 if (xen_enabled()) {
2910 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2911 return 0;
2912 }
2913
2914 QLIST_FOREACH(block, &ram_list.blocks, next) {
2915 /* This case append when the block is not mapped. */
2916 if (block->host == NULL) {
2917 continue;
2918 }
2919 if (host - block->host < block->length) {
2920 *ram_addr = block->offset + (host - block->host);
2921 return 0;
2922 }
2923 }
2924
2925 return -1;
2926 }
2927
2928 /* Some of the softmmu routines need to translate from a host pointer
2929 (typically a TLB entry) back to a ram offset. */
2930 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2931 {
2932 ram_addr_t ram_addr;
2933
2934 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2935 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2936 abort();
2937 }
2938 return ram_addr;
2939 }
2940
2941 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2942 unsigned size)
2943 {
2944 #ifdef DEBUG_UNASSIGNED
2945 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2946 #endif
2947 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2948 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2949 #endif
2950 return 0;
2951 }
2952
2953 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2954 uint64_t val, unsigned size)
2955 {
2956 #ifdef DEBUG_UNASSIGNED
2957 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2958 #endif
2959 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2960 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2961 #endif
2962 }
2963
2964 static const MemoryRegionOps unassigned_mem_ops = {
2965 .read = unassigned_mem_read,
2966 .write = unassigned_mem_write,
2967 .endianness = DEVICE_NATIVE_ENDIAN,
2968 };
2969
2970 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2971 unsigned size)
2972 {
2973 abort();
2974 }
2975
2976 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2977 uint64_t value, unsigned size)
2978 {
2979 abort();
2980 }
2981
2982 static const MemoryRegionOps error_mem_ops = {
2983 .read = error_mem_read,
2984 .write = error_mem_write,
2985 .endianness = DEVICE_NATIVE_ENDIAN,
2986 };
2987
2988 static const MemoryRegionOps rom_mem_ops = {
2989 .read = error_mem_read,
2990 .write = unassigned_mem_write,
2991 .endianness = DEVICE_NATIVE_ENDIAN,
2992 };
2993
2994 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2995 uint64_t val, unsigned size)
2996 {
2997 int dirty_flags;
2998 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2999 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3000 #if !defined(CONFIG_USER_ONLY)
3001 tb_invalidate_phys_page_fast(ram_addr, size);
3002 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3003 #endif
3004 }
3005 switch (size) {
3006 case 1:
3007 stb_p(qemu_get_ram_ptr(ram_addr), val);
3008 break;
3009 case 2:
3010 stw_p(qemu_get_ram_ptr(ram_addr), val);
3011 break;
3012 case 4:
3013 stl_p(qemu_get_ram_ptr(ram_addr), val);
3014 break;
3015 default:
3016 abort();
3017 }
3018 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3019 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3020 /* we remove the notdirty callback only if the code has been
3021 flushed */
3022 if (dirty_flags == 0xff)
3023 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3024 }
3025
3026 static const MemoryRegionOps notdirty_mem_ops = {
3027 .read = error_mem_read,
3028 .write = notdirty_mem_write,
3029 .endianness = DEVICE_NATIVE_ENDIAN,
3030 };
3031
3032 /* Generate a debug exception if a watchpoint has been hit. */
3033 static void check_watchpoint(int offset, int len_mask, int flags)
3034 {
3035 CPUArchState *env = cpu_single_env;
3036 target_ulong pc, cs_base;
3037 TranslationBlock *tb;
3038 target_ulong vaddr;
3039 CPUWatchpoint *wp;
3040 int cpu_flags;
3041
3042 if (env->watchpoint_hit) {
3043 /* We re-entered the check after replacing the TB. Now raise
3044 * the debug interrupt so that is will trigger after the
3045 * current instruction. */
3046 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3047 return;
3048 }
3049 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3050 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3051 if ((vaddr == (wp->vaddr & len_mask) ||
3052 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3053 wp->flags |= BP_WATCHPOINT_HIT;
3054 if (!env->watchpoint_hit) {
3055 env->watchpoint_hit = wp;
3056 tb = tb_find_pc(env->mem_io_pc);
3057 if (!tb) {
3058 cpu_abort(env, "check_watchpoint: could not find TB for "
3059 "pc=%p", (void *)env->mem_io_pc);
3060 }
3061 cpu_restore_state(tb, env, env->mem_io_pc);
3062 tb_phys_invalidate(tb, -1);
3063 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3064 env->exception_index = EXCP_DEBUG;
3065 cpu_loop_exit(env);
3066 } else {
3067 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3068 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3069 cpu_resume_from_signal(env, NULL);
3070 }
3071 }
3072 } else {
3073 wp->flags &= ~BP_WATCHPOINT_HIT;
3074 }
3075 }
3076 }
3077
3078 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3079 so these check for a hit then pass through to the normal out-of-line
3080 phys routines. */
3081 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3082 unsigned size)
3083 {
3084 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3085 switch (size) {
3086 case 1: return ldub_phys(addr);
3087 case 2: return lduw_phys(addr);
3088 case 4: return ldl_phys(addr);
3089 default: abort();
3090 }
3091 }
3092
3093 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3094 uint64_t val, unsigned size)
3095 {
3096 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3097 switch (size) {
3098 case 1:
3099 stb_phys(addr, val);
3100 break;
3101 case 2:
3102 stw_phys(addr, val);
3103 break;
3104 case 4:
3105 stl_phys(addr, val);
3106 break;
3107 default: abort();
3108 }
3109 }
3110
3111 static const MemoryRegionOps watch_mem_ops = {
3112 .read = watch_mem_read,
3113 .write = watch_mem_write,
3114 .endianness = DEVICE_NATIVE_ENDIAN,
3115 };
3116
3117 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3118 unsigned len)
3119 {
3120 subpage_t *mmio = opaque;
3121 unsigned int idx = SUBPAGE_IDX(addr);
3122 MemoryRegionSection *section;
3123 #if defined(DEBUG_SUBPAGE)
3124 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3125 mmio, len, addr, idx);
3126 #endif
3127
3128 section = &phys_sections[mmio->sub_section[idx]];
3129 addr += mmio->base;
3130 addr -= section->offset_within_address_space;
3131 addr += section->offset_within_region;
3132 return io_mem_read(section->mr, addr, len);
3133 }
3134
3135 static void subpage_write(void *opaque, target_phys_addr_t addr,
3136 uint64_t value, unsigned len)
3137 {
3138 subpage_t *mmio = opaque;
3139 unsigned int idx = SUBPAGE_IDX(addr);
3140 MemoryRegionSection *section;
3141 #if defined(DEBUG_SUBPAGE)
3142 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3143 " idx %d value %"PRIx64"\n",
3144 __func__, mmio, len, addr, idx, value);
3145 #endif
3146
3147 section = &phys_sections[mmio->sub_section[idx]];
3148 addr += mmio->base;
3149 addr -= section->offset_within_address_space;
3150 addr += section->offset_within_region;
3151 io_mem_write(section->mr, addr, value, len);
3152 }
3153
3154 static const MemoryRegionOps subpage_ops = {
3155 .read = subpage_read,
3156 .write = subpage_write,
3157 .endianness = DEVICE_NATIVE_ENDIAN,
3158 };
3159
3160 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3161 unsigned size)
3162 {
3163 ram_addr_t raddr = addr;
3164 void *ptr = qemu_get_ram_ptr(raddr);
3165 switch (size) {
3166 case 1: return ldub_p(ptr);
3167 case 2: return lduw_p(ptr);
3168 case 4: return ldl_p(ptr);
3169 default: abort();
3170 }
3171 }
3172
3173 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3174 uint64_t value, unsigned size)
3175 {
3176 ram_addr_t raddr = addr;
3177 void *ptr = qemu_get_ram_ptr(raddr);
3178 switch (size) {
3179 case 1: return stb_p(ptr, value);
3180 case 2: return stw_p(ptr, value);
3181 case 4: return stl_p(ptr, value);
3182 default: abort();
3183 }
3184 }
3185
3186 static const MemoryRegionOps subpage_ram_ops = {
3187 .read = subpage_ram_read,
3188 .write = subpage_ram_write,
3189 .endianness = DEVICE_NATIVE_ENDIAN,
3190 };
3191
3192 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3193 uint16_t section)
3194 {
3195 int idx, eidx;
3196
3197 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3198 return -1;
3199 idx = SUBPAGE_IDX(start);
3200 eidx = SUBPAGE_IDX(end);
3201 #if defined(DEBUG_SUBPAGE)
3202 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3203 mmio, start, end, idx, eidx, memory);
3204 #endif
3205 if (memory_region_is_ram(phys_sections[section].mr)) {
3206 MemoryRegionSection new_section = phys_sections[section];
3207 new_section.mr = &io_mem_subpage_ram;
3208 section = phys_section_add(&new_section);
3209 }
3210 for (; idx <= eidx; idx++) {
3211 mmio->sub_section[idx] = section;
3212 }
3213
3214 return 0;
3215 }
3216
3217 static subpage_t *subpage_init(target_phys_addr_t base)
3218 {
3219 subpage_t *mmio;
3220
3221 mmio = g_malloc0(sizeof(subpage_t));
3222
3223 mmio->base = base;
3224 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3225 "subpage", TARGET_PAGE_SIZE);
3226 mmio->iomem.subpage = true;
3227 #if defined(DEBUG_SUBPAGE)
3228 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3229 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3230 #endif
3231 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3232
3233 return mmio;
3234 }
3235
3236 static uint16_t dummy_section(MemoryRegion *mr)
3237 {
3238 MemoryRegionSection section = {
3239 .mr = mr,
3240 .offset_within_address_space = 0,
3241 .offset_within_region = 0,
3242 .size = UINT64_MAX,
3243 };
3244
3245 return phys_section_add(&section);
3246 }
3247
3248 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3249 {
3250 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3251 }
3252
3253 static void io_mem_init(void)
3254 {
3255 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3256 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3257 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3258 "unassigned", UINT64_MAX);
3259 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3260 "notdirty", UINT64_MAX);
3261 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3262 "subpage-ram", UINT64_MAX);
3263 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3264 "watch", UINT64_MAX);
3265 }
3266
3267 static void core_begin(MemoryListener *listener)
3268 {
3269 destroy_all_mappings();
3270 phys_sections_clear();
3271 phys_map.ptr = PHYS_MAP_NODE_NIL;
3272 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3273 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3274 phys_section_rom = dummy_section(&io_mem_rom);
3275 phys_section_watch = dummy_section(&io_mem_watch);
3276 }
3277
3278 static void core_commit(MemoryListener *listener)
3279 {
3280 CPUArchState *env;
3281
3282 /* since each CPU stores ram addresses in its TLB cache, we must
3283 reset the modified entries */
3284 /* XXX: slow ! */
3285 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3286 tlb_flush(env, 1);
3287 }
3288 }
3289
3290 static void core_region_add(MemoryListener *listener,
3291 MemoryRegionSection *section)
3292 {
3293 cpu_register_physical_memory_log(section, section->readonly);
3294 }
3295
3296 static void core_region_del(MemoryListener *listener,
3297 MemoryRegionSection *section)
3298 {
3299 }
3300
3301 static void core_region_nop(MemoryListener *listener,
3302 MemoryRegionSection *section)
3303 {
3304 cpu_register_physical_memory_log(section, section->readonly);
3305 }
3306
3307 static void core_log_start(MemoryListener *listener,
3308 MemoryRegionSection *section)
3309 {
3310 }
3311
3312 static void core_log_stop(MemoryListener *listener,
3313 MemoryRegionSection *section)
3314 {
3315 }
3316
3317 static void core_log_sync(MemoryListener *listener,
3318 MemoryRegionSection *section)
3319 {
3320 }
3321
3322 static void core_log_global_start(MemoryListener *listener)
3323 {
3324 cpu_physical_memory_set_dirty_tracking(1);
3325 }
3326
3327 static void core_log_global_stop(MemoryListener *listener)
3328 {
3329 cpu_physical_memory_set_dirty_tracking(0);
3330 }
3331
3332 static void core_eventfd_add(MemoryListener *listener,
3333 MemoryRegionSection *section,
3334 bool match_data, uint64_t data, int fd)
3335 {
3336 }
3337
3338 static void core_eventfd_del(MemoryListener *listener,
3339 MemoryRegionSection *section,
3340 bool match_data, uint64_t data, int fd)
3341 {
3342 }
3343
3344 static void io_begin(MemoryListener *listener)
3345 {
3346 }
3347
3348 static void io_commit(MemoryListener *listener)
3349 {
3350 }
3351
3352 static void io_region_add(MemoryListener *listener,
3353 MemoryRegionSection *section)
3354 {
3355 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3356
3357 mrio->mr = section->mr;
3358 mrio->offset = section->offset_within_region;
3359 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3360 section->offset_within_address_space, section->size);
3361 ioport_register(&mrio->iorange);
3362 }
3363
3364 static void io_region_del(MemoryListener *listener,
3365 MemoryRegionSection *section)
3366 {
3367 isa_unassign_ioport(section->offset_within_address_space, section->size);
3368 }
3369
3370 static void io_region_nop(MemoryListener *listener,
3371 MemoryRegionSection *section)
3372 {
3373 }
3374
3375 static void io_log_start(MemoryListener *listener,
3376 MemoryRegionSection *section)
3377 {
3378 }
3379
3380 static void io_log_stop(MemoryListener *listener,
3381 MemoryRegionSection *section)
3382 {
3383 }
3384
3385 static void io_log_sync(MemoryListener *listener,
3386 MemoryRegionSection *section)
3387 {
3388 }
3389
3390 static void io_log_global_start(MemoryListener *listener)
3391 {
3392 }
3393
3394 static void io_log_global_stop(MemoryListener *listener)
3395 {
3396 }
3397
3398 static void io_eventfd_add(MemoryListener *listener,
3399 MemoryRegionSection *section,
3400 bool match_data, uint64_t data, int fd)
3401 {
3402 }
3403
3404 static void io_eventfd_del(MemoryListener *listener,
3405 MemoryRegionSection *section,
3406 bool match_data, uint64_t data, int fd)
3407 {
3408 }
3409
3410 static MemoryListener core_memory_listener = {
3411 .begin = core_begin,
3412 .commit = core_commit,
3413 .region_add = core_region_add,
3414 .region_del = core_region_del,
3415 .region_nop = core_region_nop,
3416 .log_start = core_log_start,
3417 .log_stop = core_log_stop,
3418 .log_sync = core_log_sync,
3419 .log_global_start = core_log_global_start,
3420 .log_global_stop = core_log_global_stop,
3421 .eventfd_add = core_eventfd_add,
3422 .eventfd_del = core_eventfd_del,
3423 .priority = 0,
3424 };
3425
3426 static MemoryListener io_memory_listener = {
3427 .begin = io_begin,
3428 .commit = io_commit,
3429 .region_add = io_region_add,
3430 .region_del = io_region_del,
3431 .region_nop = io_region_nop,
3432 .log_start = io_log_start,
3433 .log_stop = io_log_stop,
3434 .log_sync = io_log_sync,
3435 .log_global_start = io_log_global_start,
3436 .log_global_stop = io_log_global_stop,
3437 .eventfd_add = io_eventfd_add,
3438 .eventfd_del = io_eventfd_del,
3439 .priority = 0,
3440 };
3441
3442 static void memory_map_init(void)
3443 {
3444 system_memory = g_malloc(sizeof(*system_memory));
3445 memory_region_init(system_memory, "system", INT64_MAX);
3446 set_system_memory_map(system_memory);
3447
3448 system_io = g_malloc(sizeof(*system_io));
3449 memory_region_init(system_io, "io", 65536);
3450 set_system_io_map(system_io);
3451
3452 memory_listener_register(&core_memory_listener, system_memory);
3453 memory_listener_register(&io_memory_listener, system_io);
3454 }
3455
3456 MemoryRegion *get_system_memory(void)
3457 {
3458 return system_memory;
3459 }
3460
3461 MemoryRegion *get_system_io(void)
3462 {
3463 return system_io;
3464 }
3465
3466 #endif /* !defined(CONFIG_USER_ONLY) */
3467
3468 /* physical memory access (slow version, mainly for debug) */
3469 #if defined(CONFIG_USER_ONLY)
3470 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3471 uint8_t *buf, int len, int is_write)
3472 {
3473 int l, flags;
3474 target_ulong page;
3475 void * p;
3476
3477 while (len > 0) {
3478 page = addr & TARGET_PAGE_MASK;
3479 l = (page + TARGET_PAGE_SIZE) - addr;
3480 if (l > len)
3481 l = len;
3482 flags = page_get_flags(page);
3483 if (!(flags & PAGE_VALID))
3484 return -1;
3485 if (is_write) {
3486 if (!(flags & PAGE_WRITE))
3487 return -1;
3488 /* XXX: this code should not depend on lock_user */
3489 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3490 return -1;
3491 memcpy(p, buf, l);
3492 unlock_user(p, addr, l);
3493 } else {
3494 if (!(flags & PAGE_READ))
3495 return -1;
3496 /* XXX: this code should not depend on lock_user */
3497 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3498 return -1;
3499 memcpy(buf, p, l);
3500 unlock_user(p, addr, 0);
3501 }
3502 len -= l;
3503 buf += l;
3504 addr += l;
3505 }
3506 return 0;
3507 }
3508
3509 #else
3510 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3511 int len, int is_write)
3512 {
3513 int l;
3514 uint8_t *ptr;
3515 uint32_t val;
3516 target_phys_addr_t page;
3517 MemoryRegionSection *section;
3518
3519 while (len > 0) {
3520 page = addr & TARGET_PAGE_MASK;
3521 l = (page + TARGET_PAGE_SIZE) - addr;
3522 if (l > len)
3523 l = len;
3524 section = phys_page_find(page >> TARGET_PAGE_BITS);
3525
3526 if (is_write) {
3527 if (!memory_region_is_ram(section->mr)) {
3528 target_phys_addr_t addr1;
3529 addr1 = memory_region_section_addr(section, addr);
3530 /* XXX: could force cpu_single_env to NULL to avoid
3531 potential bugs */
3532 if (l >= 4 && ((addr1 & 3) == 0)) {
3533 /* 32 bit write access */
3534 val = ldl_p(buf);
3535 io_mem_write(section->mr, addr1, val, 4);
3536 l = 4;
3537 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3538 /* 16 bit write access */
3539 val = lduw_p(buf);
3540 io_mem_write(section->mr, addr1, val, 2);
3541 l = 2;
3542 } else {
3543 /* 8 bit write access */
3544 val = ldub_p(buf);
3545 io_mem_write(section->mr, addr1, val, 1);
3546 l = 1;
3547 }
3548 } else if (!section->readonly) {
3549 ram_addr_t addr1;
3550 addr1 = memory_region_get_ram_addr(section->mr)
3551 + memory_region_section_addr(section, addr);
3552 /* RAM case */
3553 ptr = qemu_get_ram_ptr(addr1);
3554 memcpy(ptr, buf, l);
3555 if (!cpu_physical_memory_is_dirty(addr1)) {
3556 /* invalidate code */
3557 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3558 /* set dirty bit */
3559 cpu_physical_memory_set_dirty_flags(
3560 addr1, (0xff & ~CODE_DIRTY_FLAG));
3561 }
3562 qemu_put_ram_ptr(ptr);
3563 }
3564 } else {
3565 if (!(memory_region_is_ram(section->mr) ||
3566 memory_region_is_romd(section->mr))) {
3567 target_phys_addr_t addr1;
3568 /* I/O case */
3569 addr1 = memory_region_section_addr(section, addr);
3570 if (l >= 4 && ((addr1 & 3) == 0)) {
3571 /* 32 bit read access */
3572 val = io_mem_read(section->mr, addr1, 4);
3573 stl_p(buf, val);
3574 l = 4;
3575 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3576 /* 16 bit read access */
3577 val = io_mem_read(section->mr, addr1, 2);
3578 stw_p(buf, val);
3579 l = 2;
3580 } else {
3581 /* 8 bit read access */
3582 val = io_mem_read(section->mr, addr1, 1);
3583 stb_p(buf, val);
3584 l = 1;
3585 }
3586 } else {
3587 /* RAM case */
3588 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3589 + memory_region_section_addr(section,
3590 addr));
3591 memcpy(buf, ptr, l);
3592 qemu_put_ram_ptr(ptr);
3593 }
3594 }
3595 len -= l;
3596 buf += l;
3597 addr += l;
3598 }
3599 }
3600
3601 /* used for ROM loading : can write in RAM and ROM */
3602 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3603 const uint8_t *buf, int len)
3604 {
3605 int l;
3606 uint8_t *ptr;
3607 target_phys_addr_t page;
3608 MemoryRegionSection *section;
3609
3610 while (len > 0) {
3611 page = addr & TARGET_PAGE_MASK;
3612 l = (page + TARGET_PAGE_SIZE) - addr;
3613 if (l > len)
3614 l = len;
3615 section = phys_page_find(page >> TARGET_PAGE_BITS);
3616
3617 if (!(memory_region_is_ram(section->mr) ||
3618 memory_region_is_romd(section->mr))) {
3619 /* do nothing */
3620 } else {
3621 unsigned long addr1;
3622 addr1 = memory_region_get_ram_addr(section->mr)
3623 + memory_region_section_addr(section, addr);
3624 /* ROM/RAM case */
3625 ptr = qemu_get_ram_ptr(addr1);
3626 memcpy(ptr, buf, l);
3627 qemu_put_ram_ptr(ptr);
3628 }
3629 len -= l;
3630 buf += l;
3631 addr += l;
3632 }
3633 }
3634
3635 typedef struct {
3636 void *buffer;
3637 target_phys_addr_t addr;
3638 target_phys_addr_t len;
3639 } BounceBuffer;
3640
3641 static BounceBuffer bounce;
3642
3643 typedef struct MapClient {
3644 void *opaque;
3645 void (*callback)(void *opaque);
3646 QLIST_ENTRY(MapClient) link;
3647 } MapClient;
3648
3649 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3650 = QLIST_HEAD_INITIALIZER(map_client_list);
3651
3652 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3653 {
3654 MapClient *client = g_malloc(sizeof(*client));
3655
3656 client->opaque = opaque;
3657 client->callback = callback;
3658 QLIST_INSERT_HEAD(&map_client_list, client, link);
3659 return client;
3660 }
3661
3662 void cpu_unregister_map_client(void *_client)
3663 {
3664 MapClient *client = (MapClient *)_client;
3665
3666 QLIST_REMOVE(client, link);
3667 g_free(client);
3668 }
3669
3670 static void cpu_notify_map_clients(void)
3671 {
3672 MapClient *client;
3673
3674 while (!QLIST_EMPTY(&map_client_list)) {
3675 client = QLIST_FIRST(&map_client_list);
3676 client->callback(client->opaque);
3677 cpu_unregister_map_client(client);
3678 }
3679 }
3680
3681 /* Map a physical memory region into a host virtual address.
3682 * May map a subset of the requested range, given by and returned in *plen.
3683 * May return NULL if resources needed to perform the mapping are exhausted.
3684 * Use only for reads OR writes - not for read-modify-write operations.
3685 * Use cpu_register_map_client() to know when retrying the map operation is
3686 * likely to succeed.
3687 */
3688 void *cpu_physical_memory_map(target_phys_addr_t addr,
3689 target_phys_addr_t *plen,
3690 int is_write)
3691 {
3692 target_phys_addr_t len = *plen;
3693 target_phys_addr_t todo = 0;
3694 int l;
3695 target_phys_addr_t page;
3696 MemoryRegionSection *section;
3697 ram_addr_t raddr = RAM_ADDR_MAX;
3698 ram_addr_t rlen;
3699 void *ret;
3700
3701 while (len > 0) {
3702 page = addr & TARGET_PAGE_MASK;
3703 l = (page + TARGET_PAGE_SIZE) - addr;
3704 if (l > len)
3705 l = len;
3706 section = phys_page_find(page >> TARGET_PAGE_BITS);
3707
3708 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3709 if (todo || bounce.buffer) {
3710 break;
3711 }
3712 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3713 bounce.addr = addr;
3714 bounce.len = l;
3715 if (!is_write) {
3716 cpu_physical_memory_read(addr, bounce.buffer, l);
3717 }
3718
3719 *plen = l;
3720 return bounce.buffer;
3721 }
3722 if (!todo) {
3723 raddr = memory_region_get_ram_addr(section->mr)
3724 + memory_region_section_addr(section, addr);
3725 }
3726
3727 len -= l;
3728 addr += l;
3729 todo += l;
3730 }
3731 rlen = todo;
3732 ret = qemu_ram_ptr_length(raddr, &rlen);
3733 *plen = rlen;
3734 return ret;
3735 }
3736
3737 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3738 * Will also mark the memory as dirty if is_write == 1. access_len gives
3739 * the amount of memory that was actually read or written by the caller.
3740 */
3741 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3742 int is_write, target_phys_addr_t access_len)
3743 {
3744 if (buffer != bounce.buffer) {
3745 if (is_write) {
3746 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3747 while (access_len) {
3748 unsigned l;
3749 l = TARGET_PAGE_SIZE;
3750 if (l > access_len)
3751 l = access_len;
3752 if (!cpu_physical_memory_is_dirty(addr1)) {
3753 /* invalidate code */
3754 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3755 /* set dirty bit */
3756 cpu_physical_memory_set_dirty_flags(
3757 addr1, (0xff & ~CODE_DIRTY_FLAG));
3758 }
3759 addr1 += l;
3760 access_len -= l;
3761 }
3762 }
3763 if (xen_enabled()) {
3764 xen_invalidate_map_cache_entry(buffer);
3765 }
3766 return;
3767 }
3768 if (is_write) {
3769 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3770 }
3771 qemu_vfree(bounce.buffer);
3772 bounce.buffer = NULL;
3773 cpu_notify_map_clients();
3774 }
3775
3776 /* warning: addr must be aligned */
3777 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3778 enum device_endian endian)
3779 {
3780 uint8_t *ptr;
3781 uint32_t val;
3782 MemoryRegionSection *section;
3783
3784 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3785
3786 if (!(memory_region_is_ram(section->mr) ||
3787 memory_region_is_romd(section->mr))) {
3788 /* I/O case */
3789 addr = memory_region_section_addr(section, addr);
3790 val = io_mem_read(section->mr, addr, 4);
3791 #if defined(TARGET_WORDS_BIGENDIAN)
3792 if (endian == DEVICE_LITTLE_ENDIAN) {
3793 val = bswap32(val);
3794 }
3795 #else
3796 if (endian == DEVICE_BIG_ENDIAN) {
3797 val = bswap32(val);
3798 }
3799 #endif
3800 } else {
3801 /* RAM case */
3802 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3803 & TARGET_PAGE_MASK)
3804 + memory_region_section_addr(section, addr));
3805 switch (endian) {
3806 case DEVICE_LITTLE_ENDIAN:
3807 val = ldl_le_p(ptr);
3808 break;
3809 case DEVICE_BIG_ENDIAN:
3810 val = ldl_be_p(ptr);
3811 break;
3812 default:
3813 val = ldl_p(ptr);
3814 break;
3815 }
3816 }
3817 return val;
3818 }
3819
3820 uint32_t ldl_phys(target_phys_addr_t addr)
3821 {
3822 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3823 }
3824
3825 uint32_t ldl_le_phys(target_phys_addr_t addr)
3826 {
3827 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3828 }
3829
3830 uint32_t ldl_be_phys(target_phys_addr_t addr)
3831 {
3832 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3833 }
3834
3835 /* warning: addr must be aligned */
3836 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3837 enum device_endian endian)
3838 {
3839 uint8_t *ptr;
3840 uint64_t val;
3841 MemoryRegionSection *section;
3842
3843 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3844
3845 if (!(memory_region_is_ram(section->mr) ||
3846 memory_region_is_romd(section->mr))) {
3847 /* I/O case */
3848 addr = memory_region_section_addr(section, addr);
3849
3850 /* XXX This is broken when device endian != cpu endian.
3851 Fix and add "endian" variable check */
3852 #ifdef TARGET_WORDS_BIGENDIAN
3853 val = io_mem_read(section->mr, addr, 4) << 32;
3854 val |= io_mem_read(section->mr, addr + 4, 4);
3855 #else
3856 val = io_mem_read(section->mr, addr, 4);
3857 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3858 #endif
3859 } else {
3860 /* RAM case */
3861 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3862 & TARGET_PAGE_MASK)
3863 + memory_region_section_addr(section, addr));
3864 switch (endian) {
3865 case DEVICE_LITTLE_ENDIAN:
3866 val = ldq_le_p(ptr);
3867 break;
3868 case DEVICE_BIG_ENDIAN:
3869 val = ldq_be_p(ptr);
3870 break;
3871 default:
3872 val = ldq_p(ptr);
3873 break;
3874 }
3875 }
3876 return val;
3877 }
3878
3879 uint64_t ldq_phys(target_phys_addr_t addr)
3880 {
3881 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3882 }
3883
3884 uint64_t ldq_le_phys(target_phys_addr_t addr)
3885 {
3886 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3887 }
3888
3889 uint64_t ldq_be_phys(target_phys_addr_t addr)
3890 {
3891 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3892 }
3893
3894 /* XXX: optimize */
3895 uint32_t ldub_phys(target_phys_addr_t addr)
3896 {
3897 uint8_t val;
3898 cpu_physical_memory_read(addr, &val, 1);
3899 return val;
3900 }
3901
3902 /* warning: addr must be aligned */
3903 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3904 enum device_endian endian)
3905 {
3906 uint8_t *ptr;
3907 uint64_t val;
3908 MemoryRegionSection *section;
3909
3910 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3911
3912 if (!(memory_region_is_ram(section->mr) ||
3913 memory_region_is_romd(section->mr))) {
3914 /* I/O case */
3915 addr = memory_region_section_addr(section, addr);
3916 val = io_mem_read(section->mr, addr, 2);
3917 #if defined(TARGET_WORDS_BIGENDIAN)
3918 if (endian == DEVICE_LITTLE_ENDIAN) {
3919 val = bswap16(val);
3920 }
3921 #else
3922 if (endian == DEVICE_BIG_ENDIAN) {
3923 val = bswap16(val);
3924 }
3925 #endif
3926 } else {
3927 /* RAM case */
3928 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3929 & TARGET_PAGE_MASK)
3930 + memory_region_section_addr(section, addr));
3931 switch (endian) {
3932 case DEVICE_LITTLE_ENDIAN:
3933 val = lduw_le_p(ptr);
3934 break;
3935 case DEVICE_BIG_ENDIAN:
3936 val = lduw_be_p(ptr);
3937 break;
3938 default:
3939 val = lduw_p(ptr);
3940 break;
3941 }
3942 }
3943 return val;
3944 }
3945
3946 uint32_t lduw_phys(target_phys_addr_t addr)
3947 {
3948 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3949 }
3950
3951 uint32_t lduw_le_phys(target_phys_addr_t addr)
3952 {
3953 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3954 }
3955
3956 uint32_t lduw_be_phys(target_phys_addr_t addr)
3957 {
3958 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3959 }
3960
3961 /* warning: addr must be aligned. The ram page is not masked as dirty
3962 and the code inside is not invalidated. It is useful if the dirty
3963 bits are used to track modified PTEs */
3964 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3965 {
3966 uint8_t *ptr;
3967 MemoryRegionSection *section;
3968
3969 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3970
3971 if (!memory_region_is_ram(section->mr) || section->readonly) {
3972 addr = memory_region_section_addr(section, addr);
3973 if (memory_region_is_ram(section->mr)) {
3974 section = &phys_sections[phys_section_rom];
3975 }
3976 io_mem_write(section->mr, addr, val, 4);
3977 } else {
3978 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3979 & TARGET_PAGE_MASK)
3980 + memory_region_section_addr(section, addr);
3981 ptr = qemu_get_ram_ptr(addr1);
3982 stl_p(ptr, val);
3983
3984 if (unlikely(in_migration)) {
3985 if (!cpu_physical_memory_is_dirty(addr1)) {
3986 /* invalidate code */
3987 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3988 /* set dirty bit */
3989 cpu_physical_memory_set_dirty_flags(
3990 addr1, (0xff & ~CODE_DIRTY_FLAG));
3991 }
3992 }
3993 }
3994 }
3995
3996 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3997 {
3998 uint8_t *ptr;
3999 MemoryRegionSection *section;
4000
4001 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4002
4003 if (!memory_region_is_ram(section->mr) || section->readonly) {
4004 addr = memory_region_section_addr(section, addr);
4005 if (memory_region_is_ram(section->mr)) {
4006 section = &phys_sections[phys_section_rom];
4007 }
4008 #ifdef TARGET_WORDS_BIGENDIAN
4009 io_mem_write(section->mr, addr, val >> 32, 4);
4010 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
4011 #else
4012 io_mem_write(section->mr, addr, (uint32_t)val, 4);
4013 io_mem_write(section->mr, addr + 4, val >> 32, 4);
4014 #endif
4015 } else {
4016 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4017 & TARGET_PAGE_MASK)
4018 + memory_region_section_addr(section, addr));
4019 stq_p(ptr, val);
4020 }
4021 }
4022
4023 /* warning: addr must be aligned */
4024 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4025 enum device_endian endian)
4026 {
4027 uint8_t *ptr;
4028 MemoryRegionSection *section;
4029
4030 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4031
4032 if (!memory_region_is_ram(section->mr) || section->readonly) {
4033 addr = memory_region_section_addr(section, addr);
4034 if (memory_region_is_ram(section->mr)) {
4035 section = &phys_sections[phys_section_rom];
4036 }
4037 #if defined(TARGET_WORDS_BIGENDIAN)
4038 if (endian == DEVICE_LITTLE_ENDIAN) {
4039 val = bswap32(val);
4040 }
4041 #else
4042 if (endian == DEVICE_BIG_ENDIAN) {
4043 val = bswap32(val);
4044 }
4045 #endif
4046 io_mem_write(section->mr, addr, val, 4);
4047 } else {
4048 unsigned long addr1;
4049 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4050 + memory_region_section_addr(section, addr);
4051 /* RAM case */
4052 ptr = qemu_get_ram_ptr(addr1);
4053 switch (endian) {
4054 case DEVICE_LITTLE_ENDIAN:
4055 stl_le_p(ptr, val);
4056 break;
4057 case DEVICE_BIG_ENDIAN:
4058 stl_be_p(ptr, val);
4059 break;
4060 default:
4061 stl_p(ptr, val);
4062 break;
4063 }
4064 if (!cpu_physical_memory_is_dirty(addr1)) {
4065 /* invalidate code */
4066 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4067 /* set dirty bit */
4068 cpu_physical_memory_set_dirty_flags(addr1,
4069 (0xff & ~CODE_DIRTY_FLAG));
4070 }
4071 }
4072 }
4073
4074 void stl_phys(target_phys_addr_t addr, uint32_t val)
4075 {
4076 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4077 }
4078
4079 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4080 {
4081 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4082 }
4083
4084 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4085 {
4086 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4087 }
4088
4089 /* XXX: optimize */
4090 void stb_phys(target_phys_addr_t addr, uint32_t val)
4091 {
4092 uint8_t v = val;
4093 cpu_physical_memory_write(addr, &v, 1);
4094 }
4095
4096 /* warning: addr must be aligned */
4097 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4098 enum device_endian endian)
4099 {
4100 uint8_t *ptr;
4101 MemoryRegionSection *section;
4102
4103 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4104
4105 if (!memory_region_is_ram(section->mr) || section->readonly) {
4106 addr = memory_region_section_addr(section, addr);
4107 if (memory_region_is_ram(section->mr)) {
4108 section = &phys_sections[phys_section_rom];
4109 }
4110 #if defined(TARGET_WORDS_BIGENDIAN)
4111 if (endian == DEVICE_LITTLE_ENDIAN) {
4112 val = bswap16(val);
4113 }
4114 #else
4115 if (endian == DEVICE_BIG_ENDIAN) {
4116 val = bswap16(val);
4117 }
4118 #endif
4119 io_mem_write(section->mr, addr, val, 2);
4120 } else {
4121 unsigned long addr1;
4122 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4123 + memory_region_section_addr(section, addr);
4124 /* RAM case */
4125 ptr = qemu_get_ram_ptr(addr1);
4126 switch (endian) {
4127 case DEVICE_LITTLE_ENDIAN:
4128 stw_le_p(ptr, val);
4129 break;
4130 case DEVICE_BIG_ENDIAN:
4131 stw_be_p(ptr, val);
4132 break;
4133 default:
4134 stw_p(ptr, val);
4135 break;
4136 }
4137 if (!cpu_physical_memory_is_dirty(addr1)) {
4138 /* invalidate code */
4139 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4140 /* set dirty bit */
4141 cpu_physical_memory_set_dirty_flags(addr1,
4142 (0xff & ~CODE_DIRTY_FLAG));
4143 }
4144 }
4145 }
4146
4147 void stw_phys(target_phys_addr_t addr, uint32_t val)
4148 {
4149 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4150 }
4151
4152 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4153 {
4154 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4155 }
4156
4157 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4158 {
4159 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4160 }
4161
4162 /* XXX: optimize */
4163 void stq_phys(target_phys_addr_t addr, uint64_t val)
4164 {
4165 val = tswap64(val);
4166 cpu_physical_memory_write(addr, &val, 8);
4167 }
4168
4169 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4170 {
4171 val = cpu_to_le64(val);
4172 cpu_physical_memory_write(addr, &val, 8);
4173 }
4174
4175 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4176 {
4177 val = cpu_to_be64(val);
4178 cpu_physical_memory_write(addr, &val, 8);
4179 }
4180
4181 /* virtual memory access for debug (includes writing to ROM) */
4182 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4183 uint8_t *buf, int len, int is_write)
4184 {
4185 int l;
4186 target_phys_addr_t phys_addr;
4187 target_ulong page;
4188
4189 while (len > 0) {
4190 page = addr & TARGET_PAGE_MASK;
4191 phys_addr = cpu_get_phys_page_debug(env, page);
4192 /* if no physical page mapped, return an error */
4193 if (phys_addr == -1)
4194 return -1;
4195 l = (page + TARGET_PAGE_SIZE) - addr;
4196 if (l > len)
4197 l = len;
4198 phys_addr += (addr & ~TARGET_PAGE_MASK);
4199 if (is_write)
4200 cpu_physical_memory_write_rom(phys_addr, buf, l);
4201 else
4202 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4203 len -= l;
4204 buf += l;
4205 addr += l;
4206 }
4207 return 0;
4208 }
4209 #endif
4210
4211 /* in deterministic execution mode, instructions doing device I/Os
4212 must be at the end of the TB */
4213 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4214 {
4215 TranslationBlock *tb;
4216 uint32_t n, cflags;
4217 target_ulong pc, cs_base;
4218 uint64_t flags;
4219
4220 tb = tb_find_pc(retaddr);
4221 if (!tb) {
4222 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4223 (void *)retaddr);
4224 }
4225 n = env->icount_decr.u16.low + tb->icount;
4226 cpu_restore_state(tb, env, retaddr);
4227 /* Calculate how many instructions had been executed before the fault
4228 occurred. */
4229 n = n - env->icount_decr.u16.low;
4230 /* Generate a new TB ending on the I/O insn. */
4231 n++;
4232 /* On MIPS and SH, delay slot instructions can only be restarted if
4233 they were already the first instruction in the TB. If this is not
4234 the first instruction in a TB then re-execute the preceding
4235 branch. */
4236 #if defined(TARGET_MIPS)
4237 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4238 env->active_tc.PC -= 4;
4239 env->icount_decr.u16.low++;
4240 env->hflags &= ~MIPS_HFLAG_BMASK;
4241 }
4242 #elif defined(TARGET_SH4)
4243 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4244 && n > 1) {
4245 env->pc -= 2;
4246 env->icount_decr.u16.low++;
4247 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4248 }
4249 #endif
4250 /* This should never happen. */
4251 if (n > CF_COUNT_MASK)
4252 cpu_abort(env, "TB too big during recompile");
4253
4254 cflags = n | CF_LAST_IO;
4255 pc = tb->pc;
4256 cs_base = tb->cs_base;
4257 flags = tb->flags;
4258 tb_phys_invalidate(tb, -1);
4259 /* FIXME: In theory this could raise an exception. In practice
4260 we have already translated the block once so it's probably ok. */
4261 tb_gen_code(env, pc, cs_base, flags, cflags);
4262 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4263 the first in the TB) then we end up generating a whole new TB and
4264 repeating the fault, which is horribly inefficient.
4265 Better would be to execute just this insn uncached, or generate a
4266 second new TB. */
4267 cpu_resume_from_signal(env, NULL);
4268 }
4269
4270 #if !defined(CONFIG_USER_ONLY)
4271
4272 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4273 {
4274 int i, target_code_size, max_target_code_size;
4275 int direct_jmp_count, direct_jmp2_count, cross_page;
4276 TranslationBlock *tb;
4277
4278 target_code_size = 0;
4279 max_target_code_size = 0;
4280 cross_page = 0;
4281 direct_jmp_count = 0;
4282 direct_jmp2_count = 0;
4283 for(i = 0; i < nb_tbs; i++) {
4284 tb = &tbs[i];
4285 target_code_size += tb->size;
4286 if (tb->size > max_target_code_size)
4287 max_target_code_size = tb->size;
4288 if (tb->page_addr[1] != -1)
4289 cross_page++;
4290 if (tb->tb_next_offset[0] != 0xffff) {
4291 direct_jmp_count++;
4292 if (tb->tb_next_offset[1] != 0xffff) {
4293 direct_jmp2_count++;
4294 }
4295 }
4296 }
4297 /* XXX: avoid using doubles ? */
4298 cpu_fprintf(f, "Translation buffer state:\n");
4299 cpu_fprintf(f, "gen code size %td/%ld\n",
4300 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4301 cpu_fprintf(f, "TB count %d/%d\n",
4302 nb_tbs, code_gen_max_blocks);
4303 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4304 nb_tbs ? target_code_size / nb_tbs : 0,
4305 max_target_code_size);
4306 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4307 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4308 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4309 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4310 cross_page,
4311 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4312 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4313 direct_jmp_count,
4314 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4315 direct_jmp2_count,
4316 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4317 cpu_fprintf(f, "\nStatistics:\n");
4318 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4319 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4320 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4321 tcg_dump_info(f, cpu_fprintf);
4322 }
4323
4324 /*
4325 * A helper function for the _utterly broken_ virtio device model to find out if
4326 * it's running on a big endian machine. Don't do this at home kids!
4327 */
4328 bool virtio_is_big_endian(void);
4329 bool virtio_is_big_endian(void)
4330 {
4331 #if defined(TARGET_WORDS_BIGENDIAN)
4332 return true;
4333 #else
4334 return false;
4335 #endif
4336 }
4337
4338 #endif
4339
4340 #ifndef CONFIG_USER_ONLY
4341 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4342 {
4343 MemoryRegionSection *section;
4344
4345 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4346
4347 return !(memory_region_is_ram(section->mr) ||
4348 memory_region_is_romd(section->mr));
4349 }
4350 #endif