]> git.proxmox.com Git - qemu.git/blob - exec.c
update VERSION for 1.1.2
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
103
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
110
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
114
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
119
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
122
123 #endif
124
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
145
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
157
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
161
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
174
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
182
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
186
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
189
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
196
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
201 };
202
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212
213 static void io_mem_init(void);
214 static void memory_map_init(void);
215
216 static MemoryRegion io_mem_watch;
217 #endif
218
219 /* log support */
220 #ifdef WIN32
221 static const char *logfilename = "qemu.log";
222 #else
223 static const char *logfilename = "/tmp/qemu.log";
224 #endif
225 FILE *logfile;
226 int loglevel;
227 static int log_append = 0;
228
229 /* statistics */
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
232
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
235 {
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
239
240 }
241 #else
242 static void map_exec(void *addr, long size)
243 {
244 unsigned long start, end, page_size;
245
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
249
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
253
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
256 }
257 #endif
258
259 static void page_init(void)
260 {
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
264 {
265 SYSTEM_INFO system_info;
266
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
269 }
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278
279 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 {
281 #ifdef HAVE_KINFO_GETVMMAP
282 struct kinfo_vmentry *freep;
283 int i, cnt;
284
285 freep = kinfo_getvmmap(getpid(), &cnt);
286 if (freep) {
287 mmap_lock();
288 for (i = 0; i < cnt; i++) {
289 unsigned long startaddr, endaddr;
290
291 startaddr = freep[i].kve_start;
292 endaddr = freep[i].kve_end;
293 if (h2g_valid(startaddr)) {
294 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295
296 if (h2g_valid(endaddr)) {
297 endaddr = h2g(endaddr);
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 } else {
300 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
301 endaddr = ~0ul;
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 #endif
304 }
305 }
306 }
307 free(freep);
308 mmap_unlock();
309 }
310 #else
311 FILE *f;
312
313 last_brk = (unsigned long)sbrk(0);
314
315 f = fopen("/compat/linux/proc/self/maps", "r");
316 if (f) {
317 mmap_lock();
318
319 do {
320 unsigned long startaddr, endaddr;
321 int n;
322
323 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324
325 if (n == 2 && h2g_valid(startaddr)) {
326 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327
328 if (h2g_valid(endaddr)) {
329 endaddr = h2g(endaddr);
330 } else {
331 endaddr = ~0ul;
332 }
333 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 }
335 } while (!feof(f));
336
337 fclose(f);
338 mmap_unlock();
339 }
340 #endif
341 }
342 #endif
343 }
344
345 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 {
347 PageDesc *pd;
348 void **lp;
349 int i;
350
351 #if defined(CONFIG_USER_ONLY)
352 /* We can't use g_malloc because it may recurse into a locked mutex. */
353 # define ALLOC(P, SIZE) \
354 do { \
355 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
356 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
357 } while (0)
358 #else
359 # define ALLOC(P, SIZE) \
360 do { P = g_malloc0(SIZE); } while (0)
361 #endif
362
363 /* Level 1. Always allocated. */
364 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365
366 /* Level 2..N-1. */
367 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
368 void **p = *lp;
369
370 if (p == NULL) {
371 if (!alloc) {
372 return NULL;
373 }
374 ALLOC(p, sizeof(void *) * L2_SIZE);
375 *lp = p;
376 }
377
378 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
379 }
380
381 pd = *lp;
382 if (pd == NULL) {
383 if (!alloc) {
384 return NULL;
385 }
386 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
387 *lp = pd;
388 }
389
390 #undef ALLOC
391
392 return pd + (index & (L2_SIZE - 1));
393 }
394
395 static inline PageDesc *page_find(tb_page_addr_t index)
396 {
397 return page_find_alloc(index, 0);
398 }
399
400 #if !defined(CONFIG_USER_ONLY)
401
402 static void phys_map_node_reserve(unsigned nodes)
403 {
404 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
405 typedef PhysPageEntry Node[L2_SIZE];
406 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
407 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
408 phys_map_nodes_nb + nodes);
409 phys_map_nodes = g_renew(Node, phys_map_nodes,
410 phys_map_nodes_nb_alloc);
411 }
412 }
413
414 static uint16_t phys_map_node_alloc(void)
415 {
416 unsigned i;
417 uint16_t ret;
418
419 ret = phys_map_nodes_nb++;
420 assert(ret != PHYS_MAP_NODE_NIL);
421 assert(ret != phys_map_nodes_nb_alloc);
422 for (i = 0; i < L2_SIZE; ++i) {
423 phys_map_nodes[ret][i].is_leaf = 0;
424 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
425 }
426 return ret;
427 }
428
429 static void phys_map_nodes_reset(void)
430 {
431 phys_map_nodes_nb = 0;
432 }
433
434
435 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
436 target_phys_addr_t *nb, uint16_t leaf,
437 int level)
438 {
439 PhysPageEntry *p;
440 int i;
441 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
442
443 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
444 lp->ptr = phys_map_node_alloc();
445 p = phys_map_nodes[lp->ptr];
446 if (level == 0) {
447 for (i = 0; i < L2_SIZE; i++) {
448 p[i].is_leaf = 1;
449 p[i].ptr = phys_section_unassigned;
450 }
451 }
452 } else {
453 p = phys_map_nodes[lp->ptr];
454 }
455 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
456
457 while (*nb && lp < &p[L2_SIZE]) {
458 if ((*index & (step - 1)) == 0 && *nb >= step) {
459 lp->is_leaf = true;
460 lp->ptr = leaf;
461 *index += step;
462 *nb -= step;
463 } else {
464 phys_page_set_level(lp, index, nb, leaf, level - 1);
465 }
466 ++lp;
467 }
468 }
469
470 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
471 uint16_t leaf)
472 {
473 /* Wildly overreserve - it doesn't matter much. */
474 phys_map_node_reserve(3 * P_L2_LEVELS);
475
476 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
477 }
478
479 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
480 {
481 PhysPageEntry lp = phys_map;
482 PhysPageEntry *p;
483 int i;
484 uint16_t s_index = phys_section_unassigned;
485
486 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
487 if (lp.ptr == PHYS_MAP_NODE_NIL) {
488 goto not_found;
489 }
490 p = phys_map_nodes[lp.ptr];
491 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
492 }
493
494 s_index = lp.ptr;
495 not_found:
496 return &phys_sections[s_index];
497 }
498
499 bool memory_region_is_unassigned(MemoryRegion *mr)
500 {
501 return mr != &io_mem_ram && mr != &io_mem_rom
502 && mr != &io_mem_notdirty && !mr->rom_device
503 && mr != &io_mem_watch;
504 }
505
506 #define mmap_lock() do { } while(0)
507 #define mmap_unlock() do { } while(0)
508 #endif
509
510 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
511
512 #if defined(CONFIG_USER_ONLY)
513 /* Currently it is not recommended to allocate big chunks of data in
514 user mode. It will change when a dedicated libc will be used */
515 #define USE_STATIC_CODE_GEN_BUFFER
516 #endif
517
518 #ifdef USE_STATIC_CODE_GEN_BUFFER
519 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
520 __attribute__((aligned (CODE_GEN_ALIGN)));
521 #endif
522
523 static void code_gen_alloc(unsigned long tb_size)
524 {
525 #ifdef USE_STATIC_CODE_GEN_BUFFER
526 code_gen_buffer = static_code_gen_buffer;
527 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 map_exec(code_gen_buffer, code_gen_buffer_size);
529 #else
530 code_gen_buffer_size = tb_size;
531 if (code_gen_buffer_size == 0) {
532 #if defined(CONFIG_USER_ONLY)
533 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
534 #else
535 /* XXX: needs adjustments */
536 code_gen_buffer_size = (unsigned long)(ram_size / 4);
537 #endif
538 }
539 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
540 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
541 /* The code gen buffer location may have constraints depending on
542 the host cpu and OS */
543 #if defined(__linux__)
544 {
545 int flags;
546 void *start = NULL;
547
548 flags = MAP_PRIVATE | MAP_ANONYMOUS;
549 #if defined(__x86_64__)
550 flags |= MAP_32BIT;
551 /* Cannot map more than that */
552 if (code_gen_buffer_size > (800 * 1024 * 1024))
553 code_gen_buffer_size = (800 * 1024 * 1024);
554 #elif defined(__sparc_v9__)
555 // Map the buffer below 2G, so we can use direct calls and branches
556 flags |= MAP_FIXED;
557 start = (void *) 0x60000000UL;
558 if (code_gen_buffer_size > (512 * 1024 * 1024))
559 code_gen_buffer_size = (512 * 1024 * 1024);
560 #elif defined(__arm__)
561 /* Keep the buffer no bigger than 16MB to branch between blocks */
562 if (code_gen_buffer_size > 16 * 1024 * 1024)
563 code_gen_buffer_size = 16 * 1024 * 1024;
564 #elif defined(__s390x__)
565 /* Map the buffer so that we can use direct calls and branches. */
566 /* We have a +- 4GB range on the branches; leave some slop. */
567 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
568 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
569 }
570 start = (void *)0x90000000UL;
571 #endif
572 code_gen_buffer = mmap(start, code_gen_buffer_size,
573 PROT_WRITE | PROT_READ | PROT_EXEC,
574 flags, -1, 0);
575 if (code_gen_buffer == MAP_FAILED) {
576 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
577 exit(1);
578 }
579 }
580 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
581 || defined(__DragonFly__) || defined(__OpenBSD__) \
582 || defined(__NetBSD__)
583 {
584 int flags;
585 void *addr = NULL;
586 flags = MAP_PRIVATE | MAP_ANONYMOUS;
587 #if defined(__x86_64__)
588 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
589 * 0x40000000 is free */
590 flags |= MAP_FIXED;
591 addr = (void *)0x40000000;
592 /* Cannot map more than that */
593 if (code_gen_buffer_size > (800 * 1024 * 1024))
594 code_gen_buffer_size = (800 * 1024 * 1024);
595 #elif defined(__sparc_v9__)
596 // Map the buffer below 2G, so we can use direct calls and branches
597 flags |= MAP_FIXED;
598 addr = (void *) 0x60000000UL;
599 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
600 code_gen_buffer_size = (512 * 1024 * 1024);
601 }
602 #endif
603 code_gen_buffer = mmap(addr, code_gen_buffer_size,
604 PROT_WRITE | PROT_READ | PROT_EXEC,
605 flags, -1, 0);
606 if (code_gen_buffer == MAP_FAILED) {
607 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
608 exit(1);
609 }
610 }
611 #else
612 code_gen_buffer = g_malloc(code_gen_buffer_size);
613 map_exec(code_gen_buffer, code_gen_buffer_size);
614 #endif
615 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
616 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
617 code_gen_buffer_max_size = code_gen_buffer_size -
618 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
619 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
620 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
621 }
622
623 /* Must be called before using the QEMU cpus. 'tb_size' is the size
624 (in bytes) allocated to the translation buffer. Zero means default
625 size. */
626 void tcg_exec_init(unsigned long tb_size)
627 {
628 cpu_gen_init();
629 code_gen_alloc(tb_size);
630 code_gen_ptr = code_gen_buffer;
631 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 page_init();
633 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
634 /* There's no guest base to take into account, so go ahead and
635 initialize the prologue now. */
636 tcg_prologue_init(&tcg_ctx);
637 #endif
638 }
639
640 bool tcg_enabled(void)
641 {
642 return code_gen_buffer != NULL;
643 }
644
645 void cpu_exec_init_all(void)
646 {
647 #if !defined(CONFIG_USER_ONLY)
648 memory_map_init();
649 io_mem_init();
650 #endif
651 }
652
653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654
655 static int cpu_common_post_load(void *opaque, int version_id)
656 {
657 CPUArchState *env = opaque;
658
659 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
660 version_id is increased. */
661 env->interrupt_request &= ~0x01;
662 tlb_flush(env, 1);
663
664 return 0;
665 }
666
667 static const VMStateDescription vmstate_cpu_common = {
668 .name = "cpu_common",
669 .version_id = 1,
670 .minimum_version_id = 1,
671 .minimum_version_id_old = 1,
672 .post_load = cpu_common_post_load,
673 .fields = (VMStateField []) {
674 VMSTATE_UINT32(halted, CPUArchState),
675 VMSTATE_UINT32(interrupt_request, CPUArchState),
676 VMSTATE_END_OF_LIST()
677 }
678 };
679 #endif
680
681 CPUArchState *qemu_get_cpu(int cpu)
682 {
683 CPUArchState *env = first_cpu;
684
685 while (env) {
686 if (env->cpu_index == cpu)
687 break;
688 env = env->next_cpu;
689 }
690
691 return env;
692 }
693
694 void cpu_exec_init(CPUArchState *env)
695 {
696 CPUArchState **penv;
697 int cpu_index;
698
699 #if defined(CONFIG_USER_ONLY)
700 cpu_list_lock();
701 #endif
702 env->next_cpu = NULL;
703 penv = &first_cpu;
704 cpu_index = 0;
705 while (*penv != NULL) {
706 penv = &(*penv)->next_cpu;
707 cpu_index++;
708 }
709 env->cpu_index = cpu_index;
710 env->numa_node = 0;
711 QTAILQ_INIT(&env->breakpoints);
712 QTAILQ_INIT(&env->watchpoints);
713 #ifndef CONFIG_USER_ONLY
714 env->thread_id = qemu_get_thread_id();
715 #endif
716 *penv = env;
717 #if defined(CONFIG_USER_ONLY)
718 cpu_list_unlock();
719 #endif
720 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
721 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
722 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
723 cpu_save, cpu_load, env);
724 #endif
725 }
726
727 /* Allocate a new translation block. Flush the translation buffer if
728 too many translation blocks or too much generated code. */
729 static TranslationBlock *tb_alloc(target_ulong pc)
730 {
731 TranslationBlock *tb;
732
733 if (nb_tbs >= code_gen_max_blocks ||
734 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
735 return NULL;
736 tb = &tbs[nb_tbs++];
737 tb->pc = pc;
738 tb->cflags = 0;
739 return tb;
740 }
741
742 void tb_free(TranslationBlock *tb)
743 {
744 /* In practice this is mostly used for single use temporary TB
745 Ignore the hard cases and just back up if this TB happens to
746 be the last one generated. */
747 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
748 code_gen_ptr = tb->tc_ptr;
749 nb_tbs--;
750 }
751 }
752
753 static inline void invalidate_page_bitmap(PageDesc *p)
754 {
755 if (p->code_bitmap) {
756 g_free(p->code_bitmap);
757 p->code_bitmap = NULL;
758 }
759 p->code_write_count = 0;
760 }
761
762 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763
764 static void page_flush_tb_1 (int level, void **lp)
765 {
766 int i;
767
768 if (*lp == NULL) {
769 return;
770 }
771 if (level == 0) {
772 PageDesc *pd = *lp;
773 for (i = 0; i < L2_SIZE; ++i) {
774 pd[i].first_tb = NULL;
775 invalidate_page_bitmap(pd + i);
776 }
777 } else {
778 void **pp = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 page_flush_tb_1 (level - 1, pp + i);
781 }
782 }
783 }
784
785 static void page_flush_tb(void)
786 {
787 int i;
788 for (i = 0; i < V_L1_SIZE; i++) {
789 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
790 }
791 }
792
793 /* flush all the translation blocks */
794 /* XXX: tb_flush is currently not thread safe */
795 void tb_flush(CPUArchState *env1)
796 {
797 CPUArchState *env;
798 #if defined(DEBUG_FLUSH)
799 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
800 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 nb_tbs, nb_tbs > 0 ?
802 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 #endif
804 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
805 cpu_abort(env1, "Internal error: code buffer overflow\n");
806
807 nb_tbs = 0;
808
809 for(env = first_cpu; env != NULL; env = env->next_cpu) {
810 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 }
812
813 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
814 page_flush_tb();
815
816 code_gen_ptr = code_gen_buffer;
817 /* XXX: flush processor icache at this point if cache flush is
818 expensive */
819 tb_flush_count++;
820 }
821
822 #ifdef DEBUG_TB_CHECK
823
824 static void tb_invalidate_check(target_ulong address)
825 {
826 TranslationBlock *tb;
827 int i;
828 address &= TARGET_PAGE_MASK;
829 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
830 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
831 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
832 address >= tb->pc + tb->size)) {
833 printf("ERROR invalidate: address=" TARGET_FMT_lx
834 " PC=%08lx size=%04x\n",
835 address, (long)tb->pc, tb->size);
836 }
837 }
838 }
839 }
840
841 /* verify that all the pages have correct rights for code */
842 static void tb_page_check(void)
843 {
844 TranslationBlock *tb;
845 int i, flags1, flags2;
846
847 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
848 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
849 flags1 = page_get_flags(tb->pc);
850 flags2 = page_get_flags(tb->pc + tb->size - 1);
851 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
852 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
853 (long)tb->pc, tb->size, flags1, flags2);
854 }
855 }
856 }
857 }
858
859 #endif
860
861 /* invalidate one TB */
862 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
863 int next_offset)
864 {
865 TranslationBlock *tb1;
866 for(;;) {
867 tb1 = *ptb;
868 if (tb1 == tb) {
869 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
870 break;
871 }
872 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
873 }
874 }
875
876 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 {
878 TranslationBlock *tb1;
879 unsigned int n1;
880
881 for(;;) {
882 tb1 = *ptb;
883 n1 = (uintptr_t)tb1 & 3;
884 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 if (tb1 == tb) {
886 *ptb = tb1->page_next[n1];
887 break;
888 }
889 ptb = &tb1->page_next[n1];
890 }
891 }
892
893 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 {
895 TranslationBlock *tb1, **ptb;
896 unsigned int n1;
897
898 ptb = &tb->jmp_next[n];
899 tb1 = *ptb;
900 if (tb1) {
901 /* find tb(n) in circular list */
902 for(;;) {
903 tb1 = *ptb;
904 n1 = (uintptr_t)tb1 & 3;
905 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
906 if (n1 == n && tb1 == tb)
907 break;
908 if (n1 == 2) {
909 ptb = &tb1->jmp_first;
910 } else {
911 ptb = &tb1->jmp_next[n1];
912 }
913 }
914 /* now we can suppress tb(n) from the list */
915 *ptb = tb->jmp_next[n];
916
917 tb->jmp_next[n] = NULL;
918 }
919 }
920
921 /* reset the jump entry 'n' of a TB so that it is not chained to
922 another TB */
923 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 {
925 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
926 }
927
928 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 {
930 CPUArchState *env;
931 PageDesc *p;
932 unsigned int h, n1;
933 tb_page_addr_t phys_pc;
934 TranslationBlock *tb1, *tb2;
935
936 /* remove the TB from the hash list */
937 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
938 h = tb_phys_hash_func(phys_pc);
939 tb_remove(&tb_phys_hash[h], tb,
940 offsetof(TranslationBlock, phys_hash_next));
941
942 /* remove the TB from the page list */
943 if (tb->page_addr[0] != page_addr) {
944 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
945 tb_page_remove(&p->first_tb, tb);
946 invalidate_page_bitmap(p);
947 }
948 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
949 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
950 tb_page_remove(&p->first_tb, tb);
951 invalidate_page_bitmap(p);
952 }
953
954 tb_invalidated_flag = 1;
955
956 /* remove the TB from the hash list */
957 h = tb_jmp_cache_hash_func(tb->pc);
958 for(env = first_cpu; env != NULL; env = env->next_cpu) {
959 if (env->tb_jmp_cache[h] == tb)
960 env->tb_jmp_cache[h] = NULL;
961 }
962
963 /* suppress this TB from the two jump lists */
964 tb_jmp_remove(tb, 0);
965 tb_jmp_remove(tb, 1);
966
967 /* suppress any remaining jumps to this TB */
968 tb1 = tb->jmp_first;
969 for(;;) {
970 n1 = (uintptr_t)tb1 & 3;
971 if (n1 == 2)
972 break;
973 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
974 tb2 = tb1->jmp_next[n1];
975 tb_reset_jump(tb1, n1);
976 tb1->jmp_next[n1] = NULL;
977 tb1 = tb2;
978 }
979 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980
981 tb_phys_invalidate_count++;
982 }
983
984 static inline void set_bits(uint8_t *tab, int start, int len)
985 {
986 int end, mask, end1;
987
988 end = start + len;
989 tab += start >> 3;
990 mask = 0xff << (start & 7);
991 if ((start & ~7) == (end & ~7)) {
992 if (start < end) {
993 mask &= ~(0xff << (end & 7));
994 *tab |= mask;
995 }
996 } else {
997 *tab++ |= mask;
998 start = (start + 8) & ~7;
999 end1 = end & ~7;
1000 while (start < end1) {
1001 *tab++ = 0xff;
1002 start += 8;
1003 }
1004 if (start < end) {
1005 mask = ~(0xff << (end & 7));
1006 *tab |= mask;
1007 }
1008 }
1009 }
1010
1011 static void build_page_bitmap(PageDesc *p)
1012 {
1013 int n, tb_start, tb_end;
1014 TranslationBlock *tb;
1015
1016 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1017
1018 tb = p->first_tb;
1019 while (tb != NULL) {
1020 n = (uintptr_t)tb & 3;
1021 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1022 /* NOTE: this is subtle as a TB may span two physical pages */
1023 if (n == 0) {
1024 /* NOTE: tb_end may be after the end of the page, but
1025 it is not a problem */
1026 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1027 tb_end = tb_start + tb->size;
1028 if (tb_end > TARGET_PAGE_SIZE)
1029 tb_end = TARGET_PAGE_SIZE;
1030 } else {
1031 tb_start = 0;
1032 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 }
1034 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1035 tb = tb->page_next[n];
1036 }
1037 }
1038
1039 TranslationBlock *tb_gen_code(CPUArchState *env,
1040 target_ulong pc, target_ulong cs_base,
1041 int flags, int cflags)
1042 {
1043 TranslationBlock *tb;
1044 uint8_t *tc_ptr;
1045 tb_page_addr_t phys_pc, phys_page2;
1046 target_ulong virt_page2;
1047 int code_gen_size;
1048
1049 phys_pc = get_page_addr_code(env, pc);
1050 tb = tb_alloc(pc);
1051 if (!tb) {
1052 /* flush must be done */
1053 tb_flush(env);
1054 /* cannot fail at this point */
1055 tb = tb_alloc(pc);
1056 /* Don't forget to invalidate previous TB info. */
1057 tb_invalidated_flag = 1;
1058 }
1059 tc_ptr = code_gen_ptr;
1060 tb->tc_ptr = tc_ptr;
1061 tb->cs_base = cs_base;
1062 tb->flags = flags;
1063 tb->cflags = cflags;
1064 cpu_gen_code(env, tb, &code_gen_size);
1065 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1066 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067
1068 /* check next page if needed */
1069 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 phys_page2 = -1;
1071 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1072 phys_page2 = get_page_addr_code(env, virt_page2);
1073 }
1074 tb_link_page(tb, phys_pc, phys_page2);
1075 return tb;
1076 }
1077
1078 /*
1079 * invalidate all TBs which intersect with the target physical pages
1080 * starting in range [start;end[. NOTE: start and end may refer to
1081 * different physical pages. 'is_cpu_write_access' should be true if called
1082 * from a real cpu write access: the virtual CPU will exit the current
1083 * TB if code is modified inside this TB.
1084 */
1085 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1086 int is_cpu_write_access)
1087 {
1088 while (start < end) {
1089 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1090 start &= TARGET_PAGE_MASK;
1091 start += TARGET_PAGE_SIZE;
1092 }
1093 }
1094
1095 /* invalidate all TBs which intersect with the target physical page
1096 starting in range [start;end[. NOTE: start and end must refer to
1097 the same physical page. 'is_cpu_write_access' should be true if called
1098 from a real cpu write access: the virtual CPU will exit the current
1099 TB if code is modified inside this TB. */
1100 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1101 int is_cpu_write_access)
1102 {
1103 TranslationBlock *tb, *tb_next, *saved_tb;
1104 CPUArchState *env = cpu_single_env;
1105 tb_page_addr_t tb_start, tb_end;
1106 PageDesc *p;
1107 int n;
1108 #ifdef TARGET_HAS_PRECISE_SMC
1109 int current_tb_not_found = is_cpu_write_access;
1110 TranslationBlock *current_tb = NULL;
1111 int current_tb_modified = 0;
1112 target_ulong current_pc = 0;
1113 target_ulong current_cs_base = 0;
1114 int current_flags = 0;
1115 #endif /* TARGET_HAS_PRECISE_SMC */
1116
1117 p = page_find(start >> TARGET_PAGE_BITS);
1118 if (!p)
1119 return;
1120 if (!p->code_bitmap &&
1121 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1122 is_cpu_write_access) {
1123 /* build code bitmap */
1124 build_page_bitmap(p);
1125 }
1126
1127 /* we remove all the TBs in the range [start, end[ */
1128 /* XXX: see if in some cases it could be faster to invalidate all the code */
1129 tb = p->first_tb;
1130 while (tb != NULL) {
1131 n = (uintptr_t)tb & 3;
1132 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1133 tb_next = tb->page_next[n];
1134 /* NOTE: this is subtle as a TB may span two physical pages */
1135 if (n == 0) {
1136 /* NOTE: tb_end may be after the end of the page, but
1137 it is not a problem */
1138 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1139 tb_end = tb_start + tb->size;
1140 } else {
1141 tb_start = tb->page_addr[1];
1142 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1143 }
1144 if (!(tb_end <= start || tb_start >= end)) {
1145 #ifdef TARGET_HAS_PRECISE_SMC
1146 if (current_tb_not_found) {
1147 current_tb_not_found = 0;
1148 current_tb = NULL;
1149 if (env->mem_io_pc) {
1150 /* now we have a real cpu fault */
1151 current_tb = tb_find_pc(env->mem_io_pc);
1152 }
1153 }
1154 if (current_tb == tb &&
1155 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1156 /* If we are modifying the current TB, we must stop
1157 its execution. We could be more precise by checking
1158 that the modification is after the current PC, but it
1159 would require a specialized function to partially
1160 restore the CPU state */
1161
1162 current_tb_modified = 1;
1163 cpu_restore_state(current_tb, env, env->mem_io_pc);
1164 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1165 &current_flags);
1166 }
1167 #endif /* TARGET_HAS_PRECISE_SMC */
1168 /* we need to do that to handle the case where a signal
1169 occurs while doing tb_phys_invalidate() */
1170 saved_tb = NULL;
1171 if (env) {
1172 saved_tb = env->current_tb;
1173 env->current_tb = NULL;
1174 }
1175 tb_phys_invalidate(tb, -1);
1176 if (env) {
1177 env->current_tb = saved_tb;
1178 if (env->interrupt_request && env->current_tb)
1179 cpu_interrupt(env, env->interrupt_request);
1180 }
1181 }
1182 tb = tb_next;
1183 }
1184 #if !defined(CONFIG_USER_ONLY)
1185 /* if no code remaining, no need to continue to use slow writes */
1186 if (!p->first_tb) {
1187 invalidate_page_bitmap(p);
1188 if (is_cpu_write_access) {
1189 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1190 }
1191 }
1192 #endif
1193 #ifdef TARGET_HAS_PRECISE_SMC
1194 if (current_tb_modified) {
1195 /* we generate a block containing just the instruction
1196 modifying the memory. It will ensure that it cannot modify
1197 itself */
1198 env->current_tb = NULL;
1199 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1200 cpu_resume_from_signal(env, NULL);
1201 }
1202 #endif
1203 }
1204
1205 /* len must be <= 8 and start must be a multiple of len */
1206 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1207 {
1208 PageDesc *p;
1209 int offset, b;
1210 #if 0
1211 if (1) {
1212 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1213 cpu_single_env->mem_io_vaddr, len,
1214 cpu_single_env->eip,
1215 cpu_single_env->eip +
1216 (intptr_t)cpu_single_env->segs[R_CS].base);
1217 }
1218 #endif
1219 p = page_find(start >> TARGET_PAGE_BITS);
1220 if (!p)
1221 return;
1222 if (p->code_bitmap) {
1223 offset = start & ~TARGET_PAGE_MASK;
1224 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1225 if (b & ((1 << len) - 1))
1226 goto do_invalidate;
1227 } else {
1228 do_invalidate:
1229 tb_invalidate_phys_page_range(start, start + len, 1);
1230 }
1231 }
1232
1233 #if !defined(CONFIG_SOFTMMU)
1234 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1235 uintptr_t pc, void *puc)
1236 {
1237 TranslationBlock *tb;
1238 PageDesc *p;
1239 int n;
1240 #ifdef TARGET_HAS_PRECISE_SMC
1241 TranslationBlock *current_tb = NULL;
1242 CPUArchState *env = cpu_single_env;
1243 int current_tb_modified = 0;
1244 target_ulong current_pc = 0;
1245 target_ulong current_cs_base = 0;
1246 int current_flags = 0;
1247 #endif
1248
1249 addr &= TARGET_PAGE_MASK;
1250 p = page_find(addr >> TARGET_PAGE_BITS);
1251 if (!p)
1252 return;
1253 tb = p->first_tb;
1254 #ifdef TARGET_HAS_PRECISE_SMC
1255 if (tb && pc != 0) {
1256 current_tb = tb_find_pc(pc);
1257 }
1258 #endif
1259 while (tb != NULL) {
1260 n = (uintptr_t)tb & 3;
1261 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1262 #ifdef TARGET_HAS_PRECISE_SMC
1263 if (current_tb == tb &&
1264 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1265 /* If we are modifying the current TB, we must stop
1266 its execution. We could be more precise by checking
1267 that the modification is after the current PC, but it
1268 would require a specialized function to partially
1269 restore the CPU state */
1270
1271 current_tb_modified = 1;
1272 cpu_restore_state(current_tb, env, pc);
1273 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1274 &current_flags);
1275 }
1276 #endif /* TARGET_HAS_PRECISE_SMC */
1277 tb_phys_invalidate(tb, addr);
1278 tb = tb->page_next[n];
1279 }
1280 p->first_tb = NULL;
1281 #ifdef TARGET_HAS_PRECISE_SMC
1282 if (current_tb_modified) {
1283 /* we generate a block containing just the instruction
1284 modifying the memory. It will ensure that it cannot modify
1285 itself */
1286 env->current_tb = NULL;
1287 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1288 cpu_resume_from_signal(env, puc);
1289 }
1290 #endif
1291 }
1292 #endif
1293
1294 /* add the tb in the target page and protect it if necessary */
1295 static inline void tb_alloc_page(TranslationBlock *tb,
1296 unsigned int n, tb_page_addr_t page_addr)
1297 {
1298 PageDesc *p;
1299 #ifndef CONFIG_USER_ONLY
1300 bool page_already_protected;
1301 #endif
1302
1303 tb->page_addr[n] = page_addr;
1304 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1305 tb->page_next[n] = p->first_tb;
1306 #ifndef CONFIG_USER_ONLY
1307 page_already_protected = p->first_tb != NULL;
1308 #endif
1309 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1310 invalidate_page_bitmap(p);
1311
1312 #if defined(TARGET_HAS_SMC) || 1
1313
1314 #if defined(CONFIG_USER_ONLY)
1315 if (p->flags & PAGE_WRITE) {
1316 target_ulong addr;
1317 PageDesc *p2;
1318 int prot;
1319
1320 /* force the host page as non writable (writes will have a
1321 page fault + mprotect overhead) */
1322 page_addr &= qemu_host_page_mask;
1323 prot = 0;
1324 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1325 addr += TARGET_PAGE_SIZE) {
1326
1327 p2 = page_find (addr >> TARGET_PAGE_BITS);
1328 if (!p2)
1329 continue;
1330 prot |= p2->flags;
1331 p2->flags &= ~PAGE_WRITE;
1332 }
1333 mprotect(g2h(page_addr), qemu_host_page_size,
1334 (prot & PAGE_BITS) & ~PAGE_WRITE);
1335 #ifdef DEBUG_TB_INVALIDATE
1336 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1337 page_addr);
1338 #endif
1339 }
1340 #else
1341 /* if some code is already present, then the pages are already
1342 protected. So we handle the case where only the first TB is
1343 allocated in a physical page */
1344 if (!page_already_protected) {
1345 tlb_protect_code(page_addr);
1346 }
1347 #endif
1348
1349 #endif /* TARGET_HAS_SMC */
1350 }
1351
1352 /* add a new TB and link it to the physical page tables. phys_page2 is
1353 (-1) to indicate that only one page contains the TB. */
1354 void tb_link_page(TranslationBlock *tb,
1355 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1356 {
1357 unsigned int h;
1358 TranslationBlock **ptb;
1359
1360 /* Grab the mmap lock to stop another thread invalidating this TB
1361 before we are done. */
1362 mmap_lock();
1363 /* add in the physical hash table */
1364 h = tb_phys_hash_func(phys_pc);
1365 ptb = &tb_phys_hash[h];
1366 tb->phys_hash_next = *ptb;
1367 *ptb = tb;
1368
1369 /* add in the page list */
1370 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1371 if (phys_page2 != -1)
1372 tb_alloc_page(tb, 1, phys_page2);
1373 else
1374 tb->page_addr[1] = -1;
1375
1376 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1377 tb->jmp_next[0] = NULL;
1378 tb->jmp_next[1] = NULL;
1379
1380 /* init original jump addresses */
1381 if (tb->tb_next_offset[0] != 0xffff)
1382 tb_reset_jump(tb, 0);
1383 if (tb->tb_next_offset[1] != 0xffff)
1384 tb_reset_jump(tb, 1);
1385
1386 #ifdef DEBUG_TB_CHECK
1387 tb_page_check();
1388 #endif
1389 mmap_unlock();
1390 }
1391
1392 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1393 tb[1].tc_ptr. Return NULL if not found */
1394 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1395 {
1396 int m_min, m_max, m;
1397 uintptr_t v;
1398 TranslationBlock *tb;
1399
1400 if (nb_tbs <= 0)
1401 return NULL;
1402 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1403 tc_ptr >= (uintptr_t)code_gen_ptr) {
1404 return NULL;
1405 }
1406 /* binary search (cf Knuth) */
1407 m_min = 0;
1408 m_max = nb_tbs - 1;
1409 while (m_min <= m_max) {
1410 m = (m_min + m_max) >> 1;
1411 tb = &tbs[m];
1412 v = (uintptr_t)tb->tc_ptr;
1413 if (v == tc_ptr)
1414 return tb;
1415 else if (tc_ptr < v) {
1416 m_max = m - 1;
1417 } else {
1418 m_min = m + 1;
1419 }
1420 }
1421 return &tbs[m_max];
1422 }
1423
1424 static void tb_reset_jump_recursive(TranslationBlock *tb);
1425
1426 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1427 {
1428 TranslationBlock *tb1, *tb_next, **ptb;
1429 unsigned int n1;
1430
1431 tb1 = tb->jmp_next[n];
1432 if (tb1 != NULL) {
1433 /* find head of list */
1434 for(;;) {
1435 n1 = (uintptr_t)tb1 & 3;
1436 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1437 if (n1 == 2)
1438 break;
1439 tb1 = tb1->jmp_next[n1];
1440 }
1441 /* we are now sure now that tb jumps to tb1 */
1442 tb_next = tb1;
1443
1444 /* remove tb from the jmp_first list */
1445 ptb = &tb_next->jmp_first;
1446 for(;;) {
1447 tb1 = *ptb;
1448 n1 = (uintptr_t)tb1 & 3;
1449 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1450 if (n1 == n && tb1 == tb)
1451 break;
1452 ptb = &tb1->jmp_next[n1];
1453 }
1454 *ptb = tb->jmp_next[n];
1455 tb->jmp_next[n] = NULL;
1456
1457 /* suppress the jump to next tb in generated code */
1458 tb_reset_jump(tb, n);
1459
1460 /* suppress jumps in the tb on which we could have jumped */
1461 tb_reset_jump_recursive(tb_next);
1462 }
1463 }
1464
1465 static void tb_reset_jump_recursive(TranslationBlock *tb)
1466 {
1467 tb_reset_jump_recursive2(tb, 0);
1468 tb_reset_jump_recursive2(tb, 1);
1469 }
1470
1471 #if defined(TARGET_HAS_ICE)
1472 #if defined(CONFIG_USER_ONLY)
1473 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1474 {
1475 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1476 }
1477 #else
1478 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1479 {
1480 ram_addr_t ram_addr;
1481 MemoryRegionSection *section;
1482
1483 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1484 if (!(memory_region_is_ram(section->mr)
1485 || (section->mr->rom_device && section->mr->readable))) {
1486 return;
1487 }
1488 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1489 + memory_region_section_addr(section, addr);
1490 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1491 }
1492
1493 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1494 {
1495 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1496 (pc & ~TARGET_PAGE_MASK));
1497 }
1498 #endif
1499 #endif /* TARGET_HAS_ICE */
1500
1501 #if defined(CONFIG_USER_ONLY)
1502 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1503
1504 {
1505 }
1506
1507 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1508 int flags, CPUWatchpoint **watchpoint)
1509 {
1510 return -ENOSYS;
1511 }
1512 #else
1513 /* Add a watchpoint. */
1514 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1515 int flags, CPUWatchpoint **watchpoint)
1516 {
1517 target_ulong len_mask = ~(len - 1);
1518 CPUWatchpoint *wp;
1519
1520 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1521 if ((len & (len - 1)) || (addr & ~len_mask) ||
1522 len == 0 || len > TARGET_PAGE_SIZE) {
1523 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1524 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1525 return -EINVAL;
1526 }
1527 wp = g_malloc(sizeof(*wp));
1528
1529 wp->vaddr = addr;
1530 wp->len_mask = len_mask;
1531 wp->flags = flags;
1532
1533 /* keep all GDB-injected watchpoints in front */
1534 if (flags & BP_GDB)
1535 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1536 else
1537 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1538
1539 tlb_flush_page(env, addr);
1540
1541 if (watchpoint)
1542 *watchpoint = wp;
1543 return 0;
1544 }
1545
1546 /* Remove a specific watchpoint. */
1547 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1548 int flags)
1549 {
1550 target_ulong len_mask = ~(len - 1);
1551 CPUWatchpoint *wp;
1552
1553 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1554 if (addr == wp->vaddr && len_mask == wp->len_mask
1555 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1556 cpu_watchpoint_remove_by_ref(env, wp);
1557 return 0;
1558 }
1559 }
1560 return -ENOENT;
1561 }
1562
1563 /* Remove a specific watchpoint by reference. */
1564 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1565 {
1566 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1567
1568 tlb_flush_page(env, watchpoint->vaddr);
1569
1570 g_free(watchpoint);
1571 }
1572
1573 /* Remove all matching watchpoints. */
1574 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1575 {
1576 CPUWatchpoint *wp, *next;
1577
1578 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1579 if (wp->flags & mask)
1580 cpu_watchpoint_remove_by_ref(env, wp);
1581 }
1582 }
1583 #endif
1584
1585 /* Add a breakpoint. */
1586 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1587 CPUBreakpoint **breakpoint)
1588 {
1589 #if defined(TARGET_HAS_ICE)
1590 CPUBreakpoint *bp;
1591
1592 bp = g_malloc(sizeof(*bp));
1593
1594 bp->pc = pc;
1595 bp->flags = flags;
1596
1597 /* keep all GDB-injected breakpoints in front */
1598 if (flags & BP_GDB)
1599 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1600 else
1601 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1602
1603 breakpoint_invalidate(env, pc);
1604
1605 if (breakpoint)
1606 *breakpoint = bp;
1607 return 0;
1608 #else
1609 return -ENOSYS;
1610 #endif
1611 }
1612
1613 /* Remove a specific breakpoint. */
1614 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1615 {
1616 #if defined(TARGET_HAS_ICE)
1617 CPUBreakpoint *bp;
1618
1619 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1620 if (bp->pc == pc && bp->flags == flags) {
1621 cpu_breakpoint_remove_by_ref(env, bp);
1622 return 0;
1623 }
1624 }
1625 return -ENOENT;
1626 #else
1627 return -ENOSYS;
1628 #endif
1629 }
1630
1631 /* Remove a specific breakpoint by reference. */
1632 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1633 {
1634 #if defined(TARGET_HAS_ICE)
1635 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1636
1637 breakpoint_invalidate(env, breakpoint->pc);
1638
1639 g_free(breakpoint);
1640 #endif
1641 }
1642
1643 /* Remove all matching breakpoints. */
1644 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1645 {
1646 #if defined(TARGET_HAS_ICE)
1647 CPUBreakpoint *bp, *next;
1648
1649 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1650 if (bp->flags & mask)
1651 cpu_breakpoint_remove_by_ref(env, bp);
1652 }
1653 #endif
1654 }
1655
1656 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1657 CPU loop after each instruction */
1658 void cpu_single_step(CPUArchState *env, int enabled)
1659 {
1660 #if defined(TARGET_HAS_ICE)
1661 if (env->singlestep_enabled != enabled) {
1662 env->singlestep_enabled = enabled;
1663 if (kvm_enabled())
1664 kvm_update_guest_debug(env, 0);
1665 else {
1666 /* must flush all the translated code to avoid inconsistencies */
1667 /* XXX: only flush what is necessary */
1668 tb_flush(env);
1669 }
1670 }
1671 #endif
1672 }
1673
1674 /* enable or disable low levels log */
1675 void cpu_set_log(int log_flags)
1676 {
1677 loglevel = log_flags;
1678 if (loglevel && !logfile) {
1679 logfile = fopen(logfilename, log_append ? "a" : "w");
1680 if (!logfile) {
1681 perror(logfilename);
1682 _exit(1);
1683 }
1684 #if !defined(CONFIG_SOFTMMU)
1685 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1686 {
1687 static char logfile_buf[4096];
1688 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1689 }
1690 #elif defined(_WIN32)
1691 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1692 setvbuf(logfile, NULL, _IONBF, 0);
1693 #else
1694 setvbuf(logfile, NULL, _IOLBF, 0);
1695 #endif
1696 log_append = 1;
1697 }
1698 if (!loglevel && logfile) {
1699 fclose(logfile);
1700 logfile = NULL;
1701 }
1702 }
1703
1704 void cpu_set_log_filename(const char *filename)
1705 {
1706 logfilename = strdup(filename);
1707 if (logfile) {
1708 fclose(logfile);
1709 logfile = NULL;
1710 }
1711 cpu_set_log(loglevel);
1712 }
1713
1714 static void cpu_unlink_tb(CPUArchState *env)
1715 {
1716 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1717 problem and hope the cpu will stop of its own accord. For userspace
1718 emulation this often isn't actually as bad as it sounds. Often
1719 signals are used primarily to interrupt blocking syscalls. */
1720 TranslationBlock *tb;
1721 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1722
1723 spin_lock(&interrupt_lock);
1724 tb = env->current_tb;
1725 /* if the cpu is currently executing code, we must unlink it and
1726 all the potentially executing TB */
1727 if (tb) {
1728 env->current_tb = NULL;
1729 tb_reset_jump_recursive(tb);
1730 }
1731 spin_unlock(&interrupt_lock);
1732 }
1733
1734 #ifndef CONFIG_USER_ONLY
1735 /* mask must never be zero, except for A20 change call */
1736 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1737 {
1738 int old_mask;
1739
1740 old_mask = env->interrupt_request;
1741 env->interrupt_request |= mask;
1742
1743 /*
1744 * If called from iothread context, wake the target cpu in
1745 * case its halted.
1746 */
1747 if (!qemu_cpu_is_self(env)) {
1748 qemu_cpu_kick(env);
1749 return;
1750 }
1751
1752 if (use_icount) {
1753 env->icount_decr.u16.high = 0xffff;
1754 if (!can_do_io(env)
1755 && (mask & ~old_mask) != 0) {
1756 cpu_abort(env, "Raised interrupt while not in I/O function");
1757 }
1758 } else {
1759 cpu_unlink_tb(env);
1760 }
1761 }
1762
1763 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1764
1765 #else /* CONFIG_USER_ONLY */
1766
1767 void cpu_interrupt(CPUArchState *env, int mask)
1768 {
1769 env->interrupt_request |= mask;
1770 cpu_unlink_tb(env);
1771 }
1772 #endif /* CONFIG_USER_ONLY */
1773
1774 void cpu_reset_interrupt(CPUArchState *env, int mask)
1775 {
1776 env->interrupt_request &= ~mask;
1777 }
1778
1779 void cpu_exit(CPUArchState *env)
1780 {
1781 env->exit_request = 1;
1782 cpu_unlink_tb(env);
1783 }
1784
1785 const CPULogItem cpu_log_items[] = {
1786 { CPU_LOG_TB_OUT_ASM, "out_asm",
1787 "show generated host assembly code for each compiled TB" },
1788 { CPU_LOG_TB_IN_ASM, "in_asm",
1789 "show target assembly code for each compiled TB" },
1790 { CPU_LOG_TB_OP, "op",
1791 "show micro ops for each compiled TB" },
1792 { CPU_LOG_TB_OP_OPT, "op_opt",
1793 "show micro ops "
1794 #ifdef TARGET_I386
1795 "before eflags optimization and "
1796 #endif
1797 "after liveness analysis" },
1798 { CPU_LOG_INT, "int",
1799 "show interrupts/exceptions in short format" },
1800 { CPU_LOG_EXEC, "exec",
1801 "show trace before each executed TB (lots of logs)" },
1802 { CPU_LOG_TB_CPU, "cpu",
1803 "show CPU state before block translation" },
1804 #ifdef TARGET_I386
1805 { CPU_LOG_PCALL, "pcall",
1806 "show protected mode far calls/returns/exceptions" },
1807 { CPU_LOG_RESET, "cpu_reset",
1808 "show CPU state before CPU resets" },
1809 #endif
1810 #ifdef DEBUG_IOPORT
1811 { CPU_LOG_IOPORT, "ioport",
1812 "show all i/o ports accesses" },
1813 #endif
1814 { 0, NULL, NULL },
1815 };
1816
1817 static int cmp1(const char *s1, int n, const char *s2)
1818 {
1819 if (strlen(s2) != n)
1820 return 0;
1821 return memcmp(s1, s2, n) == 0;
1822 }
1823
1824 /* takes a comma separated list of log masks. Return 0 if error. */
1825 int cpu_str_to_log_mask(const char *str)
1826 {
1827 const CPULogItem *item;
1828 int mask;
1829 const char *p, *p1;
1830
1831 p = str;
1832 mask = 0;
1833 for(;;) {
1834 p1 = strchr(p, ',');
1835 if (!p1)
1836 p1 = p + strlen(p);
1837 if(cmp1(p,p1-p,"all")) {
1838 for(item = cpu_log_items; item->mask != 0; item++) {
1839 mask |= item->mask;
1840 }
1841 } else {
1842 for(item = cpu_log_items; item->mask != 0; item++) {
1843 if (cmp1(p, p1 - p, item->name))
1844 goto found;
1845 }
1846 return 0;
1847 }
1848 found:
1849 mask |= item->mask;
1850 if (*p1 != ',')
1851 break;
1852 p = p1 + 1;
1853 }
1854 return mask;
1855 }
1856
1857 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1858 {
1859 va_list ap;
1860 va_list ap2;
1861
1862 va_start(ap, fmt);
1863 va_copy(ap2, ap);
1864 fprintf(stderr, "qemu: fatal: ");
1865 vfprintf(stderr, fmt, ap);
1866 fprintf(stderr, "\n");
1867 #ifdef TARGET_I386
1868 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1869 #else
1870 cpu_dump_state(env, stderr, fprintf, 0);
1871 #endif
1872 if (qemu_log_enabled()) {
1873 qemu_log("qemu: fatal: ");
1874 qemu_log_vprintf(fmt, ap2);
1875 qemu_log("\n");
1876 #ifdef TARGET_I386
1877 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1878 #else
1879 log_cpu_state(env, 0);
1880 #endif
1881 qemu_log_flush();
1882 qemu_log_close();
1883 }
1884 va_end(ap2);
1885 va_end(ap);
1886 #if defined(CONFIG_USER_ONLY)
1887 {
1888 struct sigaction act;
1889 sigfillset(&act.sa_mask);
1890 act.sa_handler = SIG_DFL;
1891 sigaction(SIGABRT, &act, NULL);
1892 }
1893 #endif
1894 abort();
1895 }
1896
1897 CPUArchState *cpu_copy(CPUArchState *env)
1898 {
1899 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1900 CPUArchState *next_cpu = new_env->next_cpu;
1901 int cpu_index = new_env->cpu_index;
1902 #if defined(TARGET_HAS_ICE)
1903 CPUBreakpoint *bp;
1904 CPUWatchpoint *wp;
1905 #endif
1906
1907 memcpy(new_env, env, sizeof(CPUArchState));
1908
1909 /* Preserve chaining and index. */
1910 new_env->next_cpu = next_cpu;
1911 new_env->cpu_index = cpu_index;
1912
1913 /* Clone all break/watchpoints.
1914 Note: Once we support ptrace with hw-debug register access, make sure
1915 BP_CPU break/watchpoints are handled correctly on clone. */
1916 QTAILQ_INIT(&env->breakpoints);
1917 QTAILQ_INIT(&env->watchpoints);
1918 #if defined(TARGET_HAS_ICE)
1919 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1920 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1921 }
1922 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1923 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1924 wp->flags, NULL);
1925 }
1926 #endif
1927
1928 return new_env;
1929 }
1930
1931 #if !defined(CONFIG_USER_ONLY)
1932 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1933 {
1934 unsigned int i;
1935
1936 /* Discard jump cache entries for any tb which might potentially
1937 overlap the flushed page. */
1938 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1939 memset (&env->tb_jmp_cache[i], 0,
1940 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1941
1942 i = tb_jmp_cache_hash_page(addr);
1943 memset (&env->tb_jmp_cache[i], 0,
1944 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1945 }
1946
1947 /* Note: start and end must be within the same ram block. */
1948 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1949 int dirty_flags)
1950 {
1951 uintptr_t length, start1;
1952
1953 start &= TARGET_PAGE_MASK;
1954 end = TARGET_PAGE_ALIGN(end);
1955
1956 length = end - start;
1957 if (length == 0)
1958 return;
1959 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1960
1961 /* we modify the TLB cache so that the dirty bit will be set again
1962 when accessing the range */
1963 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1964 /* Check that we don't span multiple blocks - this breaks the
1965 address comparisons below. */
1966 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1967 != (end - 1) - start) {
1968 abort();
1969 }
1970 cpu_tlb_reset_dirty_all(start1, length);
1971 }
1972
1973 int cpu_physical_memory_set_dirty_tracking(int enable)
1974 {
1975 int ret = 0;
1976 in_migration = enable;
1977 return ret;
1978 }
1979
1980 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1981 MemoryRegionSection *section,
1982 target_ulong vaddr,
1983 target_phys_addr_t paddr,
1984 int prot,
1985 target_ulong *address)
1986 {
1987 target_phys_addr_t iotlb;
1988 CPUWatchpoint *wp;
1989
1990 if (memory_region_is_ram(section->mr)) {
1991 /* Normal RAM. */
1992 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1993 + memory_region_section_addr(section, paddr);
1994 if (!section->readonly) {
1995 iotlb |= phys_section_notdirty;
1996 } else {
1997 iotlb |= phys_section_rom;
1998 }
1999 } else {
2000 /* IO handlers are currently passed a physical address.
2001 It would be nice to pass an offset from the base address
2002 of that region. This would avoid having to special case RAM,
2003 and avoid full address decoding in every device.
2004 We can't use the high bits of pd for this because
2005 IO_MEM_ROMD uses these as a ram address. */
2006 iotlb = section - phys_sections;
2007 iotlb += memory_region_section_addr(section, paddr);
2008 }
2009
2010 /* Make accesses to pages with watchpoints go via the
2011 watchpoint trap routines. */
2012 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2013 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2014 /* Avoid trapping reads of pages with a write breakpoint. */
2015 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2016 iotlb = phys_section_watch + paddr;
2017 *address |= TLB_MMIO;
2018 break;
2019 }
2020 }
2021 }
2022
2023 return iotlb;
2024 }
2025
2026 #else
2027 /*
2028 * Walks guest process memory "regions" one by one
2029 * and calls callback function 'fn' for each region.
2030 */
2031
2032 struct walk_memory_regions_data
2033 {
2034 walk_memory_regions_fn fn;
2035 void *priv;
2036 uintptr_t start;
2037 int prot;
2038 };
2039
2040 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2041 abi_ulong end, int new_prot)
2042 {
2043 if (data->start != -1ul) {
2044 int rc = data->fn(data->priv, data->start, end, data->prot);
2045 if (rc != 0) {
2046 return rc;
2047 }
2048 }
2049
2050 data->start = (new_prot ? end : -1ul);
2051 data->prot = new_prot;
2052
2053 return 0;
2054 }
2055
2056 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2057 abi_ulong base, int level, void **lp)
2058 {
2059 abi_ulong pa;
2060 int i, rc;
2061
2062 if (*lp == NULL) {
2063 return walk_memory_regions_end(data, base, 0);
2064 }
2065
2066 if (level == 0) {
2067 PageDesc *pd = *lp;
2068 for (i = 0; i < L2_SIZE; ++i) {
2069 int prot = pd[i].flags;
2070
2071 pa = base | (i << TARGET_PAGE_BITS);
2072 if (prot != data->prot) {
2073 rc = walk_memory_regions_end(data, pa, prot);
2074 if (rc != 0) {
2075 return rc;
2076 }
2077 }
2078 }
2079 } else {
2080 void **pp = *lp;
2081 for (i = 0; i < L2_SIZE; ++i) {
2082 pa = base | ((abi_ulong)i <<
2083 (TARGET_PAGE_BITS + L2_BITS * level));
2084 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2085 if (rc != 0) {
2086 return rc;
2087 }
2088 }
2089 }
2090
2091 return 0;
2092 }
2093
2094 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2095 {
2096 struct walk_memory_regions_data data;
2097 uintptr_t i;
2098
2099 data.fn = fn;
2100 data.priv = priv;
2101 data.start = -1ul;
2102 data.prot = 0;
2103
2104 for (i = 0; i < V_L1_SIZE; i++) {
2105 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2106 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2107 if (rc != 0) {
2108 return rc;
2109 }
2110 }
2111
2112 return walk_memory_regions_end(&data, 0, 0);
2113 }
2114
2115 static int dump_region(void *priv, abi_ulong start,
2116 abi_ulong end, unsigned long prot)
2117 {
2118 FILE *f = (FILE *)priv;
2119
2120 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2121 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2122 start, end, end - start,
2123 ((prot & PAGE_READ) ? 'r' : '-'),
2124 ((prot & PAGE_WRITE) ? 'w' : '-'),
2125 ((prot & PAGE_EXEC) ? 'x' : '-'));
2126
2127 return (0);
2128 }
2129
2130 /* dump memory mappings */
2131 void page_dump(FILE *f)
2132 {
2133 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2134 "start", "end", "size", "prot");
2135 walk_memory_regions(f, dump_region);
2136 }
2137
2138 int page_get_flags(target_ulong address)
2139 {
2140 PageDesc *p;
2141
2142 p = page_find(address >> TARGET_PAGE_BITS);
2143 if (!p)
2144 return 0;
2145 return p->flags;
2146 }
2147
2148 /* Modify the flags of a page and invalidate the code if necessary.
2149 The flag PAGE_WRITE_ORG is positioned automatically depending
2150 on PAGE_WRITE. The mmap_lock should already be held. */
2151 void page_set_flags(target_ulong start, target_ulong end, int flags)
2152 {
2153 target_ulong addr, len;
2154
2155 /* This function should never be called with addresses outside the
2156 guest address space. If this assert fires, it probably indicates
2157 a missing call to h2g_valid. */
2158 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2159 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2160 #endif
2161 assert(start < end);
2162
2163 start = start & TARGET_PAGE_MASK;
2164 end = TARGET_PAGE_ALIGN(end);
2165
2166 if (flags & PAGE_WRITE) {
2167 flags |= PAGE_WRITE_ORG;
2168 }
2169
2170 for (addr = start, len = end - start;
2171 len != 0;
2172 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2173 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2174
2175 /* If the write protection bit is set, then we invalidate
2176 the code inside. */
2177 if (!(p->flags & PAGE_WRITE) &&
2178 (flags & PAGE_WRITE) &&
2179 p->first_tb) {
2180 tb_invalidate_phys_page(addr, 0, NULL);
2181 }
2182 p->flags = flags;
2183 }
2184 }
2185
2186 int page_check_range(target_ulong start, target_ulong len, int flags)
2187 {
2188 PageDesc *p;
2189 target_ulong end;
2190 target_ulong addr;
2191
2192 /* This function should never be called with addresses outside the
2193 guest address space. If this assert fires, it probably indicates
2194 a missing call to h2g_valid. */
2195 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2196 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2197 #endif
2198
2199 if (len == 0) {
2200 return 0;
2201 }
2202 if (start + len - 1 < start) {
2203 /* We've wrapped around. */
2204 return -1;
2205 }
2206
2207 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2208 start = start & TARGET_PAGE_MASK;
2209
2210 for (addr = start, len = end - start;
2211 len != 0;
2212 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2213 p = page_find(addr >> TARGET_PAGE_BITS);
2214 if( !p )
2215 return -1;
2216 if( !(p->flags & PAGE_VALID) )
2217 return -1;
2218
2219 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2220 return -1;
2221 if (flags & PAGE_WRITE) {
2222 if (!(p->flags & PAGE_WRITE_ORG))
2223 return -1;
2224 /* unprotect the page if it was put read-only because it
2225 contains translated code */
2226 if (!(p->flags & PAGE_WRITE)) {
2227 if (!page_unprotect(addr, 0, NULL))
2228 return -1;
2229 }
2230 return 0;
2231 }
2232 }
2233 return 0;
2234 }
2235
2236 /* called from signal handler: invalidate the code and unprotect the
2237 page. Return TRUE if the fault was successfully handled. */
2238 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2239 {
2240 unsigned int prot;
2241 PageDesc *p;
2242 target_ulong host_start, host_end, addr;
2243
2244 /* Technically this isn't safe inside a signal handler. However we
2245 know this only ever happens in a synchronous SEGV handler, so in
2246 practice it seems to be ok. */
2247 mmap_lock();
2248
2249 p = page_find(address >> TARGET_PAGE_BITS);
2250 if (!p) {
2251 mmap_unlock();
2252 return 0;
2253 }
2254
2255 /* if the page was really writable, then we change its
2256 protection back to writable */
2257 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2258 host_start = address & qemu_host_page_mask;
2259 host_end = host_start + qemu_host_page_size;
2260
2261 prot = 0;
2262 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2263 p = page_find(addr >> TARGET_PAGE_BITS);
2264 p->flags |= PAGE_WRITE;
2265 prot |= p->flags;
2266
2267 /* and since the content will be modified, we must invalidate
2268 the corresponding translated code. */
2269 tb_invalidate_phys_page(addr, pc, puc);
2270 #ifdef DEBUG_TB_CHECK
2271 tb_invalidate_check(addr);
2272 #endif
2273 }
2274 mprotect((void *)g2h(host_start), qemu_host_page_size,
2275 prot & PAGE_BITS);
2276
2277 mmap_unlock();
2278 return 1;
2279 }
2280 mmap_unlock();
2281 return 0;
2282 }
2283 #endif /* defined(CONFIG_USER_ONLY) */
2284
2285 #if !defined(CONFIG_USER_ONLY)
2286
2287 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2288 typedef struct subpage_t {
2289 MemoryRegion iomem;
2290 target_phys_addr_t base;
2291 uint16_t sub_section[TARGET_PAGE_SIZE];
2292 } subpage_t;
2293
2294 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2295 uint16_t section);
2296 static subpage_t *subpage_init(target_phys_addr_t base);
2297 static void destroy_page_desc(uint16_t section_index)
2298 {
2299 MemoryRegionSection *section = &phys_sections[section_index];
2300 MemoryRegion *mr = section->mr;
2301
2302 if (mr->subpage) {
2303 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2304 memory_region_destroy(&subpage->iomem);
2305 g_free(subpage);
2306 }
2307 }
2308
2309 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2310 {
2311 unsigned i;
2312 PhysPageEntry *p;
2313
2314 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2315 return;
2316 }
2317
2318 p = phys_map_nodes[lp->ptr];
2319 for (i = 0; i < L2_SIZE; ++i) {
2320 if (!p[i].is_leaf) {
2321 destroy_l2_mapping(&p[i], level - 1);
2322 } else {
2323 destroy_page_desc(p[i].ptr);
2324 }
2325 }
2326 lp->is_leaf = 0;
2327 lp->ptr = PHYS_MAP_NODE_NIL;
2328 }
2329
2330 static void destroy_all_mappings(void)
2331 {
2332 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2333 phys_map_nodes_reset();
2334 }
2335
2336 static uint16_t phys_section_add(MemoryRegionSection *section)
2337 {
2338 if (phys_sections_nb == phys_sections_nb_alloc) {
2339 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2340 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2341 phys_sections_nb_alloc);
2342 }
2343 phys_sections[phys_sections_nb] = *section;
2344 return phys_sections_nb++;
2345 }
2346
2347 static void phys_sections_clear(void)
2348 {
2349 phys_sections_nb = 0;
2350 }
2351
2352 /* register physical memory.
2353 For RAM, 'size' must be a multiple of the target page size.
2354 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2355 io memory page. The address used when calling the IO function is
2356 the offset from the start of the region, plus region_offset. Both
2357 start_addr and region_offset are rounded down to a page boundary
2358 before calculating this offset. This should not be a problem unless
2359 the low bits of start_addr and region_offset differ. */
2360 static void register_subpage(MemoryRegionSection *section)
2361 {
2362 subpage_t *subpage;
2363 target_phys_addr_t base = section->offset_within_address_space
2364 & TARGET_PAGE_MASK;
2365 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2366 MemoryRegionSection subsection = {
2367 .offset_within_address_space = base,
2368 .size = TARGET_PAGE_SIZE,
2369 };
2370 target_phys_addr_t start, end;
2371
2372 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2373
2374 if (!(existing->mr->subpage)) {
2375 subpage = subpage_init(base);
2376 subsection.mr = &subpage->iomem;
2377 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2378 phys_section_add(&subsection));
2379 } else {
2380 subpage = container_of(existing->mr, subpage_t, iomem);
2381 }
2382 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2383 end = start + section->size;
2384 subpage_register(subpage, start, end, phys_section_add(section));
2385 }
2386
2387
2388 static void register_multipage(MemoryRegionSection *section)
2389 {
2390 target_phys_addr_t start_addr = section->offset_within_address_space;
2391 ram_addr_t size = section->size;
2392 target_phys_addr_t addr;
2393 uint16_t section_index = phys_section_add(section);
2394
2395 assert(size);
2396
2397 addr = start_addr;
2398 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2399 section_index);
2400 }
2401
2402 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2403 bool readonly)
2404 {
2405 MemoryRegionSection now = *section, remain = *section;
2406
2407 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2408 || (now.size < TARGET_PAGE_SIZE)) {
2409 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2410 - now.offset_within_address_space,
2411 now.size);
2412 register_subpage(&now);
2413 remain.size -= now.size;
2414 remain.offset_within_address_space += now.size;
2415 remain.offset_within_region += now.size;
2416 }
2417 now = remain;
2418 now.size &= TARGET_PAGE_MASK;
2419 if (now.size) {
2420 register_multipage(&now);
2421 remain.size -= now.size;
2422 remain.offset_within_address_space += now.size;
2423 remain.offset_within_region += now.size;
2424 }
2425 now = remain;
2426 if (now.size) {
2427 register_subpage(&now);
2428 }
2429 }
2430
2431
2432 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2433 {
2434 if (kvm_enabled())
2435 kvm_coalesce_mmio_region(addr, size);
2436 }
2437
2438 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2439 {
2440 if (kvm_enabled())
2441 kvm_uncoalesce_mmio_region(addr, size);
2442 }
2443
2444 void qemu_flush_coalesced_mmio_buffer(void)
2445 {
2446 if (kvm_enabled())
2447 kvm_flush_coalesced_mmio_buffer();
2448 }
2449
2450 #if defined(__linux__) && !defined(TARGET_S390X)
2451
2452 #include <sys/vfs.h>
2453
2454 #define HUGETLBFS_MAGIC 0x958458f6
2455
2456 static long gethugepagesize(const char *path)
2457 {
2458 struct statfs fs;
2459 int ret;
2460
2461 do {
2462 ret = statfs(path, &fs);
2463 } while (ret != 0 && errno == EINTR);
2464
2465 if (ret != 0) {
2466 perror(path);
2467 return 0;
2468 }
2469
2470 if (fs.f_type != HUGETLBFS_MAGIC)
2471 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2472
2473 return fs.f_bsize;
2474 }
2475
2476 static void *file_ram_alloc(RAMBlock *block,
2477 ram_addr_t memory,
2478 const char *path)
2479 {
2480 char *filename;
2481 void *area;
2482 int fd;
2483 #ifdef MAP_POPULATE
2484 int flags;
2485 #endif
2486 unsigned long hpagesize;
2487
2488 hpagesize = gethugepagesize(path);
2489 if (!hpagesize) {
2490 return NULL;
2491 }
2492
2493 if (memory < hpagesize) {
2494 return NULL;
2495 }
2496
2497 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2498 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2499 return NULL;
2500 }
2501
2502 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2503 return NULL;
2504 }
2505
2506 fd = mkstemp(filename);
2507 if (fd < 0) {
2508 perror("unable to create backing store for hugepages");
2509 free(filename);
2510 return NULL;
2511 }
2512 unlink(filename);
2513 free(filename);
2514
2515 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2516
2517 /*
2518 * ftruncate is not supported by hugetlbfs in older
2519 * hosts, so don't bother bailing out on errors.
2520 * If anything goes wrong with it under other filesystems,
2521 * mmap will fail.
2522 */
2523 if (ftruncate(fd, memory))
2524 perror("ftruncate");
2525
2526 #ifdef MAP_POPULATE
2527 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2528 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2529 * to sidestep this quirk.
2530 */
2531 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2532 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2533 #else
2534 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2535 #endif
2536 if (area == MAP_FAILED) {
2537 perror("file_ram_alloc: can't mmap RAM pages");
2538 close(fd);
2539 return (NULL);
2540 }
2541 block->fd = fd;
2542 return area;
2543 }
2544 #endif
2545
2546 static ram_addr_t find_ram_offset(ram_addr_t size)
2547 {
2548 RAMBlock *block, *next_block;
2549 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2550
2551 if (QLIST_EMPTY(&ram_list.blocks))
2552 return 0;
2553
2554 QLIST_FOREACH(block, &ram_list.blocks, next) {
2555 ram_addr_t end, next = RAM_ADDR_MAX;
2556
2557 end = block->offset + block->length;
2558
2559 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2560 if (next_block->offset >= end) {
2561 next = MIN(next, next_block->offset);
2562 }
2563 }
2564 if (next - end >= size && next - end < mingap) {
2565 offset = end;
2566 mingap = next - end;
2567 }
2568 }
2569
2570 if (offset == RAM_ADDR_MAX) {
2571 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2572 (uint64_t)size);
2573 abort();
2574 }
2575
2576 return offset;
2577 }
2578
2579 static ram_addr_t last_ram_offset(void)
2580 {
2581 RAMBlock *block;
2582 ram_addr_t last = 0;
2583
2584 QLIST_FOREACH(block, &ram_list.blocks, next)
2585 last = MAX(last, block->offset + block->length);
2586
2587 return last;
2588 }
2589
2590 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2591 {
2592 RAMBlock *new_block, *block;
2593
2594 new_block = NULL;
2595 QLIST_FOREACH(block, &ram_list.blocks, next) {
2596 if (block->offset == addr) {
2597 new_block = block;
2598 break;
2599 }
2600 }
2601 assert(new_block);
2602 assert(!new_block->idstr[0]);
2603
2604 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2605 char *id = dev->parent_bus->info->get_dev_path(dev);
2606 if (id) {
2607 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2608 g_free(id);
2609 }
2610 }
2611 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2612
2613 QLIST_FOREACH(block, &ram_list.blocks, next) {
2614 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2615 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2616 new_block->idstr);
2617 abort();
2618 }
2619 }
2620 }
2621
2622 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2623 MemoryRegion *mr)
2624 {
2625 RAMBlock *new_block;
2626
2627 size = TARGET_PAGE_ALIGN(size);
2628 new_block = g_malloc0(sizeof(*new_block));
2629
2630 new_block->mr = mr;
2631 new_block->offset = find_ram_offset(size);
2632 if (host) {
2633 new_block->host = host;
2634 new_block->flags |= RAM_PREALLOC_MASK;
2635 } else {
2636 if (mem_path) {
2637 #if defined (__linux__) && !defined(TARGET_S390X)
2638 new_block->host = file_ram_alloc(new_block, size, mem_path);
2639 if (!new_block->host) {
2640 new_block->host = qemu_vmalloc(size);
2641 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2642 }
2643 #else
2644 fprintf(stderr, "-mem-path option unsupported\n");
2645 exit(1);
2646 #endif
2647 } else {
2648 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2649 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2650 an system defined value, which is at least 256GB. Larger systems
2651 have larger values. We put the guest between the end of data
2652 segment (system break) and this value. We use 32GB as a base to
2653 have enough room for the system break to grow. */
2654 new_block->host = mmap((void*)0x800000000, size,
2655 PROT_EXEC|PROT_READ|PROT_WRITE,
2656 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2657 if (new_block->host == MAP_FAILED) {
2658 fprintf(stderr, "Allocating RAM failed\n");
2659 abort();
2660 }
2661 #else
2662 if (xen_enabled()) {
2663 xen_ram_alloc(new_block->offset, size, mr);
2664 } else {
2665 new_block->host = qemu_vmalloc(size);
2666 }
2667 #endif
2668 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2669 }
2670 }
2671 new_block->length = size;
2672
2673 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2674
2675 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2676 last_ram_offset() >> TARGET_PAGE_BITS);
2677 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2678 0xff, size >> TARGET_PAGE_BITS);
2679
2680 if (kvm_enabled())
2681 kvm_setup_guest_memory(new_block->host, size);
2682
2683 return new_block->offset;
2684 }
2685
2686 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2687 {
2688 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2689 }
2690
2691 void qemu_ram_free_from_ptr(ram_addr_t addr)
2692 {
2693 RAMBlock *block;
2694
2695 QLIST_FOREACH(block, &ram_list.blocks, next) {
2696 if (addr == block->offset) {
2697 QLIST_REMOVE(block, next);
2698 g_free(block);
2699 return;
2700 }
2701 }
2702 }
2703
2704 void qemu_ram_free(ram_addr_t addr)
2705 {
2706 RAMBlock *block;
2707
2708 QLIST_FOREACH(block, &ram_list.blocks, next) {
2709 if (addr == block->offset) {
2710 QLIST_REMOVE(block, next);
2711 if (block->flags & RAM_PREALLOC_MASK) {
2712 ;
2713 } else if (mem_path) {
2714 #if defined (__linux__) && !defined(TARGET_S390X)
2715 if (block->fd) {
2716 munmap(block->host, block->length);
2717 close(block->fd);
2718 } else {
2719 qemu_vfree(block->host);
2720 }
2721 #else
2722 abort();
2723 #endif
2724 } else {
2725 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2726 munmap(block->host, block->length);
2727 #else
2728 if (xen_enabled()) {
2729 xen_invalidate_map_cache_entry(block->host);
2730 } else {
2731 qemu_vfree(block->host);
2732 }
2733 #endif
2734 }
2735 g_free(block);
2736 return;
2737 }
2738 }
2739
2740 }
2741
2742 #ifndef _WIN32
2743 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2744 {
2745 RAMBlock *block;
2746 ram_addr_t offset;
2747 int flags;
2748 void *area, *vaddr;
2749
2750 QLIST_FOREACH(block, &ram_list.blocks, next) {
2751 offset = addr - block->offset;
2752 if (offset < block->length) {
2753 vaddr = block->host + offset;
2754 if (block->flags & RAM_PREALLOC_MASK) {
2755 ;
2756 } else {
2757 flags = MAP_FIXED;
2758 munmap(vaddr, length);
2759 if (mem_path) {
2760 #if defined(__linux__) && !defined(TARGET_S390X)
2761 if (block->fd) {
2762 #ifdef MAP_POPULATE
2763 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2764 MAP_PRIVATE;
2765 #else
2766 flags |= MAP_PRIVATE;
2767 #endif
2768 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2769 flags, block->fd, offset);
2770 } else {
2771 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2772 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2773 flags, -1, 0);
2774 }
2775 #else
2776 abort();
2777 #endif
2778 } else {
2779 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2780 flags |= MAP_SHARED | MAP_ANONYMOUS;
2781 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2782 flags, -1, 0);
2783 #else
2784 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2785 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2786 flags, -1, 0);
2787 #endif
2788 }
2789 if (area != vaddr) {
2790 fprintf(stderr, "Could not remap addr: "
2791 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2792 length, addr);
2793 exit(1);
2794 }
2795 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2796 }
2797 return;
2798 }
2799 }
2800 }
2801 #endif /* !_WIN32 */
2802
2803 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2804 With the exception of the softmmu code in this file, this should
2805 only be used for local memory (e.g. video ram) that the device owns,
2806 and knows it isn't going to access beyond the end of the block.
2807
2808 It should not be used for general purpose DMA.
2809 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2810 */
2811 void *qemu_get_ram_ptr(ram_addr_t addr)
2812 {
2813 RAMBlock *block;
2814
2815 QLIST_FOREACH(block, &ram_list.blocks, next) {
2816 if (addr - block->offset < block->length) {
2817 /* Move this entry to to start of the list. */
2818 if (block != QLIST_FIRST(&ram_list.blocks)) {
2819 QLIST_REMOVE(block, next);
2820 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2821 }
2822 if (xen_enabled()) {
2823 /* We need to check if the requested address is in the RAM
2824 * because we don't want to map the entire memory in QEMU.
2825 * In that case just map until the end of the page.
2826 */
2827 if (block->offset == 0) {
2828 return xen_map_cache(addr, 0, 0);
2829 } else if (block->host == NULL) {
2830 block->host =
2831 xen_map_cache(block->offset, block->length, 1);
2832 }
2833 }
2834 return block->host + (addr - block->offset);
2835 }
2836 }
2837
2838 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2839 abort();
2840
2841 return NULL;
2842 }
2843
2844 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2845 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2846 */
2847 void *qemu_safe_ram_ptr(ram_addr_t addr)
2848 {
2849 RAMBlock *block;
2850
2851 QLIST_FOREACH(block, &ram_list.blocks, next) {
2852 if (addr - block->offset < block->length) {
2853 if (xen_enabled()) {
2854 /* We need to check if the requested address is in the RAM
2855 * because we don't want to map the entire memory in QEMU.
2856 * In that case just map until the end of the page.
2857 */
2858 if (block->offset == 0) {
2859 return xen_map_cache(addr, 0, 0);
2860 } else if (block->host == NULL) {
2861 block->host =
2862 xen_map_cache(block->offset, block->length, 1);
2863 }
2864 }
2865 return block->host + (addr - block->offset);
2866 }
2867 }
2868
2869 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2870 abort();
2871
2872 return NULL;
2873 }
2874
2875 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2876 * but takes a size argument */
2877 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2878 {
2879 if (*size == 0) {
2880 return NULL;
2881 }
2882 if (xen_enabled()) {
2883 return xen_map_cache(addr, *size, 1);
2884 } else {
2885 RAMBlock *block;
2886
2887 QLIST_FOREACH(block, &ram_list.blocks, next) {
2888 if (addr - block->offset < block->length) {
2889 if (addr - block->offset + *size > block->length)
2890 *size = block->length - addr + block->offset;
2891 return block->host + (addr - block->offset);
2892 }
2893 }
2894
2895 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2896 abort();
2897 }
2898 }
2899
2900 void qemu_put_ram_ptr(void *addr)
2901 {
2902 trace_qemu_put_ram_ptr(addr);
2903 }
2904
2905 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2906 {
2907 RAMBlock *block;
2908 uint8_t *host = ptr;
2909
2910 if (xen_enabled()) {
2911 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2912 return 0;
2913 }
2914
2915 QLIST_FOREACH(block, &ram_list.blocks, next) {
2916 /* This case append when the block is not mapped. */
2917 if (block->host == NULL) {
2918 continue;
2919 }
2920 if (host - block->host < block->length) {
2921 *ram_addr = block->offset + (host - block->host);
2922 return 0;
2923 }
2924 }
2925
2926 return -1;
2927 }
2928
2929 /* Some of the softmmu routines need to translate from a host pointer
2930 (typically a TLB entry) back to a ram offset. */
2931 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2932 {
2933 ram_addr_t ram_addr;
2934
2935 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2936 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2937 abort();
2938 }
2939 return ram_addr;
2940 }
2941
2942 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2943 unsigned size)
2944 {
2945 #ifdef DEBUG_UNASSIGNED
2946 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2947 #endif
2948 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2949 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2950 #endif
2951 return 0;
2952 }
2953
2954 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2955 uint64_t val, unsigned size)
2956 {
2957 #ifdef DEBUG_UNASSIGNED
2958 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2959 #endif
2960 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2961 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2962 #endif
2963 }
2964
2965 static const MemoryRegionOps unassigned_mem_ops = {
2966 .read = unassigned_mem_read,
2967 .write = unassigned_mem_write,
2968 .endianness = DEVICE_NATIVE_ENDIAN,
2969 };
2970
2971 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2972 unsigned size)
2973 {
2974 abort();
2975 }
2976
2977 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2978 uint64_t value, unsigned size)
2979 {
2980 abort();
2981 }
2982
2983 static const MemoryRegionOps error_mem_ops = {
2984 .read = error_mem_read,
2985 .write = error_mem_write,
2986 .endianness = DEVICE_NATIVE_ENDIAN,
2987 };
2988
2989 static const MemoryRegionOps rom_mem_ops = {
2990 .read = error_mem_read,
2991 .write = unassigned_mem_write,
2992 .endianness = DEVICE_NATIVE_ENDIAN,
2993 };
2994
2995 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2996 uint64_t val, unsigned size)
2997 {
2998 int dirty_flags;
2999 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3000 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3001 #if !defined(CONFIG_USER_ONLY)
3002 tb_invalidate_phys_page_fast(ram_addr, size);
3003 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3004 #endif
3005 }
3006 switch (size) {
3007 case 1:
3008 stb_p(qemu_get_ram_ptr(ram_addr), val);
3009 break;
3010 case 2:
3011 stw_p(qemu_get_ram_ptr(ram_addr), val);
3012 break;
3013 case 4:
3014 stl_p(qemu_get_ram_ptr(ram_addr), val);
3015 break;
3016 default:
3017 abort();
3018 }
3019 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3020 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3021 /* we remove the notdirty callback only if the code has been
3022 flushed */
3023 if (dirty_flags == 0xff)
3024 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3025 }
3026
3027 static const MemoryRegionOps notdirty_mem_ops = {
3028 .read = error_mem_read,
3029 .write = notdirty_mem_write,
3030 .endianness = DEVICE_NATIVE_ENDIAN,
3031 };
3032
3033 /* Generate a debug exception if a watchpoint has been hit. */
3034 static void check_watchpoint(int offset, int len_mask, int flags)
3035 {
3036 CPUArchState *env = cpu_single_env;
3037 target_ulong pc, cs_base;
3038 TranslationBlock *tb;
3039 target_ulong vaddr;
3040 CPUWatchpoint *wp;
3041 int cpu_flags;
3042
3043 if (env->watchpoint_hit) {
3044 /* We re-entered the check after replacing the TB. Now raise
3045 * the debug interrupt so that is will trigger after the
3046 * current instruction. */
3047 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3048 return;
3049 }
3050 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3051 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3052 if ((vaddr == (wp->vaddr & len_mask) ||
3053 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3054 wp->flags |= BP_WATCHPOINT_HIT;
3055 if (!env->watchpoint_hit) {
3056 env->watchpoint_hit = wp;
3057 tb = tb_find_pc(env->mem_io_pc);
3058 if (!tb) {
3059 cpu_abort(env, "check_watchpoint: could not find TB for "
3060 "pc=%p", (void *)env->mem_io_pc);
3061 }
3062 cpu_restore_state(tb, env, env->mem_io_pc);
3063 tb_phys_invalidate(tb, -1);
3064 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3065 env->exception_index = EXCP_DEBUG;
3066 cpu_loop_exit(env);
3067 } else {
3068 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3069 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3070 cpu_resume_from_signal(env, NULL);
3071 }
3072 }
3073 } else {
3074 wp->flags &= ~BP_WATCHPOINT_HIT;
3075 }
3076 }
3077 }
3078
3079 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3080 so these check for a hit then pass through to the normal out-of-line
3081 phys routines. */
3082 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3083 unsigned size)
3084 {
3085 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3086 switch (size) {
3087 case 1: return ldub_phys(addr);
3088 case 2: return lduw_phys(addr);
3089 case 4: return ldl_phys(addr);
3090 default: abort();
3091 }
3092 }
3093
3094 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3095 uint64_t val, unsigned size)
3096 {
3097 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3098 switch (size) {
3099 case 1:
3100 stb_phys(addr, val);
3101 break;
3102 case 2:
3103 stw_phys(addr, val);
3104 break;
3105 case 4:
3106 stl_phys(addr, val);
3107 break;
3108 default: abort();
3109 }
3110 }
3111
3112 static const MemoryRegionOps watch_mem_ops = {
3113 .read = watch_mem_read,
3114 .write = watch_mem_write,
3115 .endianness = DEVICE_NATIVE_ENDIAN,
3116 };
3117
3118 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3119 unsigned len)
3120 {
3121 subpage_t *mmio = opaque;
3122 unsigned int idx = SUBPAGE_IDX(addr);
3123 MemoryRegionSection *section;
3124 #if defined(DEBUG_SUBPAGE)
3125 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3126 mmio, len, addr, idx);
3127 #endif
3128
3129 section = &phys_sections[mmio->sub_section[idx]];
3130 addr += mmio->base;
3131 addr -= section->offset_within_address_space;
3132 addr += section->offset_within_region;
3133 return io_mem_read(section->mr, addr, len);
3134 }
3135
3136 static void subpage_write(void *opaque, target_phys_addr_t addr,
3137 uint64_t value, unsigned len)
3138 {
3139 subpage_t *mmio = opaque;
3140 unsigned int idx = SUBPAGE_IDX(addr);
3141 MemoryRegionSection *section;
3142 #if defined(DEBUG_SUBPAGE)
3143 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3144 " idx %d value %"PRIx64"\n",
3145 __func__, mmio, len, addr, idx, value);
3146 #endif
3147
3148 section = &phys_sections[mmio->sub_section[idx]];
3149 addr += mmio->base;
3150 addr -= section->offset_within_address_space;
3151 addr += section->offset_within_region;
3152 io_mem_write(section->mr, addr, value, len);
3153 }
3154
3155 static const MemoryRegionOps subpage_ops = {
3156 .read = subpage_read,
3157 .write = subpage_write,
3158 .endianness = DEVICE_NATIVE_ENDIAN,
3159 };
3160
3161 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3162 unsigned size)
3163 {
3164 ram_addr_t raddr = addr;
3165 void *ptr = qemu_get_ram_ptr(raddr);
3166 switch (size) {
3167 case 1: return ldub_p(ptr);
3168 case 2: return lduw_p(ptr);
3169 case 4: return ldl_p(ptr);
3170 default: abort();
3171 }
3172 }
3173
3174 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3175 uint64_t value, unsigned size)
3176 {
3177 ram_addr_t raddr = addr;
3178 void *ptr = qemu_get_ram_ptr(raddr);
3179 switch (size) {
3180 case 1: return stb_p(ptr, value);
3181 case 2: return stw_p(ptr, value);
3182 case 4: return stl_p(ptr, value);
3183 default: abort();
3184 }
3185 }
3186
3187 static const MemoryRegionOps subpage_ram_ops = {
3188 .read = subpage_ram_read,
3189 .write = subpage_ram_write,
3190 .endianness = DEVICE_NATIVE_ENDIAN,
3191 };
3192
3193 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3194 uint16_t section)
3195 {
3196 int idx, eidx;
3197
3198 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3199 return -1;
3200 idx = SUBPAGE_IDX(start);
3201 eidx = SUBPAGE_IDX(end);
3202 #if defined(DEBUG_SUBPAGE)
3203 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3204 mmio, start, end, idx, eidx, memory);
3205 #endif
3206 if (memory_region_is_ram(phys_sections[section].mr)) {
3207 MemoryRegionSection new_section = phys_sections[section];
3208 new_section.mr = &io_mem_subpage_ram;
3209 section = phys_section_add(&new_section);
3210 }
3211 for (; idx <= eidx; idx++) {
3212 mmio->sub_section[idx] = section;
3213 }
3214
3215 return 0;
3216 }
3217
3218 static subpage_t *subpage_init(target_phys_addr_t base)
3219 {
3220 subpage_t *mmio;
3221
3222 mmio = g_malloc0(sizeof(subpage_t));
3223
3224 mmio->base = base;
3225 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3226 "subpage", TARGET_PAGE_SIZE);
3227 mmio->iomem.subpage = true;
3228 #if defined(DEBUG_SUBPAGE)
3229 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3230 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3231 #endif
3232 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3233
3234 return mmio;
3235 }
3236
3237 static uint16_t dummy_section(MemoryRegion *mr)
3238 {
3239 MemoryRegionSection section = {
3240 .mr = mr,
3241 .offset_within_address_space = 0,
3242 .offset_within_region = 0,
3243 .size = UINT64_MAX,
3244 };
3245
3246 return phys_section_add(&section);
3247 }
3248
3249 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3250 {
3251 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3252 }
3253
3254 static void io_mem_init(void)
3255 {
3256 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3257 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3258 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3259 "unassigned", UINT64_MAX);
3260 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3261 "notdirty", UINT64_MAX);
3262 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3263 "subpage-ram", UINT64_MAX);
3264 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3265 "watch", UINT64_MAX);
3266 }
3267
3268 static void core_begin(MemoryListener *listener)
3269 {
3270 destroy_all_mappings();
3271 phys_sections_clear();
3272 phys_map.ptr = PHYS_MAP_NODE_NIL;
3273 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3274 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3275 phys_section_rom = dummy_section(&io_mem_rom);
3276 phys_section_watch = dummy_section(&io_mem_watch);
3277 }
3278
3279 static void core_commit(MemoryListener *listener)
3280 {
3281 CPUArchState *env;
3282
3283 /* since each CPU stores ram addresses in its TLB cache, we must
3284 reset the modified entries */
3285 /* XXX: slow ! */
3286 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3287 tlb_flush(env, 1);
3288 }
3289 }
3290
3291 static void core_region_add(MemoryListener *listener,
3292 MemoryRegionSection *section)
3293 {
3294 cpu_register_physical_memory_log(section, section->readonly);
3295 }
3296
3297 static void core_region_del(MemoryListener *listener,
3298 MemoryRegionSection *section)
3299 {
3300 }
3301
3302 static void core_region_nop(MemoryListener *listener,
3303 MemoryRegionSection *section)
3304 {
3305 cpu_register_physical_memory_log(section, section->readonly);
3306 }
3307
3308 static void core_log_start(MemoryListener *listener,
3309 MemoryRegionSection *section)
3310 {
3311 }
3312
3313 static void core_log_stop(MemoryListener *listener,
3314 MemoryRegionSection *section)
3315 {
3316 }
3317
3318 static void core_log_sync(MemoryListener *listener,
3319 MemoryRegionSection *section)
3320 {
3321 }
3322
3323 static void core_log_global_start(MemoryListener *listener)
3324 {
3325 cpu_physical_memory_set_dirty_tracking(1);
3326 }
3327
3328 static void core_log_global_stop(MemoryListener *listener)
3329 {
3330 cpu_physical_memory_set_dirty_tracking(0);
3331 }
3332
3333 static void core_eventfd_add(MemoryListener *listener,
3334 MemoryRegionSection *section,
3335 bool match_data, uint64_t data, int fd)
3336 {
3337 }
3338
3339 static void core_eventfd_del(MemoryListener *listener,
3340 MemoryRegionSection *section,
3341 bool match_data, uint64_t data, int fd)
3342 {
3343 }
3344
3345 static void io_begin(MemoryListener *listener)
3346 {
3347 }
3348
3349 static void io_commit(MemoryListener *listener)
3350 {
3351 }
3352
3353 static void io_region_add(MemoryListener *listener,
3354 MemoryRegionSection *section)
3355 {
3356 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3357
3358 mrio->mr = section->mr;
3359 mrio->offset = section->offset_within_region;
3360 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3361 section->offset_within_address_space, section->size);
3362 ioport_register(&mrio->iorange);
3363 }
3364
3365 static void io_region_del(MemoryListener *listener,
3366 MemoryRegionSection *section)
3367 {
3368 isa_unassign_ioport(section->offset_within_address_space, section->size);
3369 }
3370
3371 static void io_region_nop(MemoryListener *listener,
3372 MemoryRegionSection *section)
3373 {
3374 }
3375
3376 static void io_log_start(MemoryListener *listener,
3377 MemoryRegionSection *section)
3378 {
3379 }
3380
3381 static void io_log_stop(MemoryListener *listener,
3382 MemoryRegionSection *section)
3383 {
3384 }
3385
3386 static void io_log_sync(MemoryListener *listener,
3387 MemoryRegionSection *section)
3388 {
3389 }
3390
3391 static void io_log_global_start(MemoryListener *listener)
3392 {
3393 }
3394
3395 static void io_log_global_stop(MemoryListener *listener)
3396 {
3397 }
3398
3399 static void io_eventfd_add(MemoryListener *listener,
3400 MemoryRegionSection *section,
3401 bool match_data, uint64_t data, int fd)
3402 {
3403 }
3404
3405 static void io_eventfd_del(MemoryListener *listener,
3406 MemoryRegionSection *section,
3407 bool match_data, uint64_t data, int fd)
3408 {
3409 }
3410
3411 static MemoryListener core_memory_listener = {
3412 .begin = core_begin,
3413 .commit = core_commit,
3414 .region_add = core_region_add,
3415 .region_del = core_region_del,
3416 .region_nop = core_region_nop,
3417 .log_start = core_log_start,
3418 .log_stop = core_log_stop,
3419 .log_sync = core_log_sync,
3420 .log_global_start = core_log_global_start,
3421 .log_global_stop = core_log_global_stop,
3422 .eventfd_add = core_eventfd_add,
3423 .eventfd_del = core_eventfd_del,
3424 .priority = 0,
3425 };
3426
3427 static MemoryListener io_memory_listener = {
3428 .begin = io_begin,
3429 .commit = io_commit,
3430 .region_add = io_region_add,
3431 .region_del = io_region_del,
3432 .region_nop = io_region_nop,
3433 .log_start = io_log_start,
3434 .log_stop = io_log_stop,
3435 .log_sync = io_log_sync,
3436 .log_global_start = io_log_global_start,
3437 .log_global_stop = io_log_global_stop,
3438 .eventfd_add = io_eventfd_add,
3439 .eventfd_del = io_eventfd_del,
3440 .priority = 0,
3441 };
3442
3443 static void memory_map_init(void)
3444 {
3445 system_memory = g_malloc(sizeof(*system_memory));
3446 memory_region_init(system_memory, "system", INT64_MAX);
3447 set_system_memory_map(system_memory);
3448
3449 system_io = g_malloc(sizeof(*system_io));
3450 memory_region_init(system_io, "io", 65536);
3451 set_system_io_map(system_io);
3452
3453 memory_listener_register(&core_memory_listener, system_memory);
3454 memory_listener_register(&io_memory_listener, system_io);
3455 }
3456
3457 MemoryRegion *get_system_memory(void)
3458 {
3459 return system_memory;
3460 }
3461
3462 MemoryRegion *get_system_io(void)
3463 {
3464 return system_io;
3465 }
3466
3467 #endif /* !defined(CONFIG_USER_ONLY) */
3468
3469 /* physical memory access (slow version, mainly for debug) */
3470 #if defined(CONFIG_USER_ONLY)
3471 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3472 uint8_t *buf, int len, int is_write)
3473 {
3474 int l, flags;
3475 target_ulong page;
3476 void * p;
3477
3478 while (len > 0) {
3479 page = addr & TARGET_PAGE_MASK;
3480 l = (page + TARGET_PAGE_SIZE) - addr;
3481 if (l > len)
3482 l = len;
3483 flags = page_get_flags(page);
3484 if (!(flags & PAGE_VALID))
3485 return -1;
3486 if (is_write) {
3487 if (!(flags & PAGE_WRITE))
3488 return -1;
3489 /* XXX: this code should not depend on lock_user */
3490 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3491 return -1;
3492 memcpy(p, buf, l);
3493 unlock_user(p, addr, l);
3494 } else {
3495 if (!(flags & PAGE_READ))
3496 return -1;
3497 /* XXX: this code should not depend on lock_user */
3498 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3499 return -1;
3500 memcpy(buf, p, l);
3501 unlock_user(p, addr, 0);
3502 }
3503 len -= l;
3504 buf += l;
3505 addr += l;
3506 }
3507 return 0;
3508 }
3509
3510 #else
3511 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3512 int len, int is_write)
3513 {
3514 int l;
3515 uint8_t *ptr;
3516 uint32_t val;
3517 target_phys_addr_t page;
3518 MemoryRegionSection *section;
3519
3520 while (len > 0) {
3521 page = addr & TARGET_PAGE_MASK;
3522 l = (page + TARGET_PAGE_SIZE) - addr;
3523 if (l > len)
3524 l = len;
3525 section = phys_page_find(page >> TARGET_PAGE_BITS);
3526
3527 if (is_write) {
3528 if (!memory_region_is_ram(section->mr)) {
3529 target_phys_addr_t addr1;
3530 addr1 = memory_region_section_addr(section, addr);
3531 /* XXX: could force cpu_single_env to NULL to avoid
3532 potential bugs */
3533 if (l >= 4 && ((addr1 & 3) == 0)) {
3534 /* 32 bit write access */
3535 val = ldl_p(buf);
3536 io_mem_write(section->mr, addr1, val, 4);
3537 l = 4;
3538 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3539 /* 16 bit write access */
3540 val = lduw_p(buf);
3541 io_mem_write(section->mr, addr1, val, 2);
3542 l = 2;
3543 } else {
3544 /* 8 bit write access */
3545 val = ldub_p(buf);
3546 io_mem_write(section->mr, addr1, val, 1);
3547 l = 1;
3548 }
3549 } else if (!section->readonly) {
3550 ram_addr_t addr1;
3551 addr1 = memory_region_get_ram_addr(section->mr)
3552 + memory_region_section_addr(section, addr);
3553 /* RAM case */
3554 ptr = qemu_get_ram_ptr(addr1);
3555 memcpy(ptr, buf, l);
3556 if (!cpu_physical_memory_is_dirty(addr1)) {
3557 /* invalidate code */
3558 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3559 /* set dirty bit */
3560 cpu_physical_memory_set_dirty_flags(
3561 addr1, (0xff & ~CODE_DIRTY_FLAG));
3562 }
3563 qemu_put_ram_ptr(ptr);
3564 }
3565 } else {
3566 if (!(memory_region_is_ram(section->mr) ||
3567 memory_region_is_romd(section->mr))) {
3568 target_phys_addr_t addr1;
3569 /* I/O case */
3570 addr1 = memory_region_section_addr(section, addr);
3571 if (l >= 4 && ((addr1 & 3) == 0)) {
3572 /* 32 bit read access */
3573 val = io_mem_read(section->mr, addr1, 4);
3574 stl_p(buf, val);
3575 l = 4;
3576 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3577 /* 16 bit read access */
3578 val = io_mem_read(section->mr, addr1, 2);
3579 stw_p(buf, val);
3580 l = 2;
3581 } else {
3582 /* 8 bit read access */
3583 val = io_mem_read(section->mr, addr1, 1);
3584 stb_p(buf, val);
3585 l = 1;
3586 }
3587 } else {
3588 /* RAM case */
3589 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3590 + memory_region_section_addr(section,
3591 addr));
3592 memcpy(buf, ptr, l);
3593 qemu_put_ram_ptr(ptr);
3594 }
3595 }
3596 len -= l;
3597 buf += l;
3598 addr += l;
3599 }
3600 }
3601
3602 /* used for ROM loading : can write in RAM and ROM */
3603 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3604 const uint8_t *buf, int len)
3605 {
3606 int l;
3607 uint8_t *ptr;
3608 target_phys_addr_t page;
3609 MemoryRegionSection *section;
3610
3611 while (len > 0) {
3612 page = addr & TARGET_PAGE_MASK;
3613 l = (page + TARGET_PAGE_SIZE) - addr;
3614 if (l > len)
3615 l = len;
3616 section = phys_page_find(page >> TARGET_PAGE_BITS);
3617
3618 if (!(memory_region_is_ram(section->mr) ||
3619 memory_region_is_romd(section->mr))) {
3620 /* do nothing */
3621 } else {
3622 unsigned long addr1;
3623 addr1 = memory_region_get_ram_addr(section->mr)
3624 + memory_region_section_addr(section, addr);
3625 /* ROM/RAM case */
3626 ptr = qemu_get_ram_ptr(addr1);
3627 memcpy(ptr, buf, l);
3628 qemu_put_ram_ptr(ptr);
3629 }
3630 len -= l;
3631 buf += l;
3632 addr += l;
3633 }
3634 }
3635
3636 typedef struct {
3637 void *buffer;
3638 target_phys_addr_t addr;
3639 target_phys_addr_t len;
3640 } BounceBuffer;
3641
3642 static BounceBuffer bounce;
3643
3644 typedef struct MapClient {
3645 void *opaque;
3646 void (*callback)(void *opaque);
3647 QLIST_ENTRY(MapClient) link;
3648 } MapClient;
3649
3650 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3651 = QLIST_HEAD_INITIALIZER(map_client_list);
3652
3653 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3654 {
3655 MapClient *client = g_malloc(sizeof(*client));
3656
3657 client->opaque = opaque;
3658 client->callback = callback;
3659 QLIST_INSERT_HEAD(&map_client_list, client, link);
3660 return client;
3661 }
3662
3663 void cpu_unregister_map_client(void *_client)
3664 {
3665 MapClient *client = (MapClient *)_client;
3666
3667 QLIST_REMOVE(client, link);
3668 g_free(client);
3669 }
3670
3671 static void cpu_notify_map_clients(void)
3672 {
3673 MapClient *client;
3674
3675 while (!QLIST_EMPTY(&map_client_list)) {
3676 client = QLIST_FIRST(&map_client_list);
3677 client->callback(client->opaque);
3678 cpu_unregister_map_client(client);
3679 }
3680 }
3681
3682 /* Map a physical memory region into a host virtual address.
3683 * May map a subset of the requested range, given by and returned in *plen.
3684 * May return NULL if resources needed to perform the mapping are exhausted.
3685 * Use only for reads OR writes - not for read-modify-write operations.
3686 * Use cpu_register_map_client() to know when retrying the map operation is
3687 * likely to succeed.
3688 */
3689 void *cpu_physical_memory_map(target_phys_addr_t addr,
3690 target_phys_addr_t *plen,
3691 int is_write)
3692 {
3693 target_phys_addr_t len = *plen;
3694 target_phys_addr_t todo = 0;
3695 int l;
3696 target_phys_addr_t page;
3697 MemoryRegionSection *section;
3698 ram_addr_t raddr = RAM_ADDR_MAX;
3699 ram_addr_t rlen;
3700 void *ret;
3701
3702 while (len > 0) {
3703 page = addr & TARGET_PAGE_MASK;
3704 l = (page + TARGET_PAGE_SIZE) - addr;
3705 if (l > len)
3706 l = len;
3707 section = phys_page_find(page >> TARGET_PAGE_BITS);
3708
3709 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3710 if (todo || bounce.buffer) {
3711 break;
3712 }
3713 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3714 bounce.addr = addr;
3715 bounce.len = l;
3716 if (!is_write) {
3717 cpu_physical_memory_read(addr, bounce.buffer, l);
3718 }
3719
3720 *plen = l;
3721 return bounce.buffer;
3722 }
3723 if (!todo) {
3724 raddr = memory_region_get_ram_addr(section->mr)
3725 + memory_region_section_addr(section, addr);
3726 }
3727
3728 len -= l;
3729 addr += l;
3730 todo += l;
3731 }
3732 rlen = todo;
3733 ret = qemu_ram_ptr_length(raddr, &rlen);
3734 *plen = rlen;
3735 return ret;
3736 }
3737
3738 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3739 * Will also mark the memory as dirty if is_write == 1. access_len gives
3740 * the amount of memory that was actually read or written by the caller.
3741 */
3742 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3743 int is_write, target_phys_addr_t access_len)
3744 {
3745 if (buffer != bounce.buffer) {
3746 if (is_write) {
3747 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3748 while (access_len) {
3749 unsigned l;
3750 l = TARGET_PAGE_SIZE;
3751 if (l > access_len)
3752 l = access_len;
3753 if (!cpu_physical_memory_is_dirty(addr1)) {
3754 /* invalidate code */
3755 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3756 /* set dirty bit */
3757 cpu_physical_memory_set_dirty_flags(
3758 addr1, (0xff & ~CODE_DIRTY_FLAG));
3759 }
3760 addr1 += l;
3761 access_len -= l;
3762 }
3763 }
3764 if (xen_enabled()) {
3765 xen_invalidate_map_cache_entry(buffer);
3766 }
3767 return;
3768 }
3769 if (is_write) {
3770 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3771 }
3772 qemu_vfree(bounce.buffer);
3773 bounce.buffer = NULL;
3774 cpu_notify_map_clients();
3775 }
3776
3777 /* warning: addr must be aligned */
3778 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3779 enum device_endian endian)
3780 {
3781 uint8_t *ptr;
3782 uint32_t val;
3783 MemoryRegionSection *section;
3784
3785 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3786
3787 if (!(memory_region_is_ram(section->mr) ||
3788 memory_region_is_romd(section->mr))) {
3789 /* I/O case */
3790 addr = memory_region_section_addr(section, addr);
3791 val = io_mem_read(section->mr, addr, 4);
3792 #if defined(TARGET_WORDS_BIGENDIAN)
3793 if (endian == DEVICE_LITTLE_ENDIAN) {
3794 val = bswap32(val);
3795 }
3796 #else
3797 if (endian == DEVICE_BIG_ENDIAN) {
3798 val = bswap32(val);
3799 }
3800 #endif
3801 } else {
3802 /* RAM case */
3803 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3804 & TARGET_PAGE_MASK)
3805 + memory_region_section_addr(section, addr));
3806 switch (endian) {
3807 case DEVICE_LITTLE_ENDIAN:
3808 val = ldl_le_p(ptr);
3809 break;
3810 case DEVICE_BIG_ENDIAN:
3811 val = ldl_be_p(ptr);
3812 break;
3813 default:
3814 val = ldl_p(ptr);
3815 break;
3816 }
3817 }
3818 return val;
3819 }
3820
3821 uint32_t ldl_phys(target_phys_addr_t addr)
3822 {
3823 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3824 }
3825
3826 uint32_t ldl_le_phys(target_phys_addr_t addr)
3827 {
3828 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3829 }
3830
3831 uint32_t ldl_be_phys(target_phys_addr_t addr)
3832 {
3833 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3834 }
3835
3836 /* warning: addr must be aligned */
3837 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3838 enum device_endian endian)
3839 {
3840 uint8_t *ptr;
3841 uint64_t val;
3842 MemoryRegionSection *section;
3843
3844 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3845
3846 if (!(memory_region_is_ram(section->mr) ||
3847 memory_region_is_romd(section->mr))) {
3848 /* I/O case */
3849 addr = memory_region_section_addr(section, addr);
3850
3851 /* XXX This is broken when device endian != cpu endian.
3852 Fix and add "endian" variable check */
3853 #ifdef TARGET_WORDS_BIGENDIAN
3854 val = io_mem_read(section->mr, addr, 4) << 32;
3855 val |= io_mem_read(section->mr, addr + 4, 4);
3856 #else
3857 val = io_mem_read(section->mr, addr, 4);
3858 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3859 #endif
3860 } else {
3861 /* RAM case */
3862 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3863 & TARGET_PAGE_MASK)
3864 + memory_region_section_addr(section, addr));
3865 switch (endian) {
3866 case DEVICE_LITTLE_ENDIAN:
3867 val = ldq_le_p(ptr);
3868 break;
3869 case DEVICE_BIG_ENDIAN:
3870 val = ldq_be_p(ptr);
3871 break;
3872 default:
3873 val = ldq_p(ptr);
3874 break;
3875 }
3876 }
3877 return val;
3878 }
3879
3880 uint64_t ldq_phys(target_phys_addr_t addr)
3881 {
3882 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3883 }
3884
3885 uint64_t ldq_le_phys(target_phys_addr_t addr)
3886 {
3887 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3888 }
3889
3890 uint64_t ldq_be_phys(target_phys_addr_t addr)
3891 {
3892 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3893 }
3894
3895 /* XXX: optimize */
3896 uint32_t ldub_phys(target_phys_addr_t addr)
3897 {
3898 uint8_t val;
3899 cpu_physical_memory_read(addr, &val, 1);
3900 return val;
3901 }
3902
3903 /* warning: addr must be aligned */
3904 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3905 enum device_endian endian)
3906 {
3907 uint8_t *ptr;
3908 uint64_t val;
3909 MemoryRegionSection *section;
3910
3911 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3912
3913 if (!(memory_region_is_ram(section->mr) ||
3914 memory_region_is_romd(section->mr))) {
3915 /* I/O case */
3916 addr = memory_region_section_addr(section, addr);
3917 val = io_mem_read(section->mr, addr, 2);
3918 #if defined(TARGET_WORDS_BIGENDIAN)
3919 if (endian == DEVICE_LITTLE_ENDIAN) {
3920 val = bswap16(val);
3921 }
3922 #else
3923 if (endian == DEVICE_BIG_ENDIAN) {
3924 val = bswap16(val);
3925 }
3926 #endif
3927 } else {
3928 /* RAM case */
3929 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3930 & TARGET_PAGE_MASK)
3931 + memory_region_section_addr(section, addr));
3932 switch (endian) {
3933 case DEVICE_LITTLE_ENDIAN:
3934 val = lduw_le_p(ptr);
3935 break;
3936 case DEVICE_BIG_ENDIAN:
3937 val = lduw_be_p(ptr);
3938 break;
3939 default:
3940 val = lduw_p(ptr);
3941 break;
3942 }
3943 }
3944 return val;
3945 }
3946
3947 uint32_t lduw_phys(target_phys_addr_t addr)
3948 {
3949 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3950 }
3951
3952 uint32_t lduw_le_phys(target_phys_addr_t addr)
3953 {
3954 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3955 }
3956
3957 uint32_t lduw_be_phys(target_phys_addr_t addr)
3958 {
3959 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3960 }
3961
3962 /* warning: addr must be aligned. The ram page is not masked as dirty
3963 and the code inside is not invalidated. It is useful if the dirty
3964 bits are used to track modified PTEs */
3965 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3966 {
3967 uint8_t *ptr;
3968 MemoryRegionSection *section;
3969
3970 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3971
3972 if (!memory_region_is_ram(section->mr) || section->readonly) {
3973 addr = memory_region_section_addr(section, addr);
3974 if (memory_region_is_ram(section->mr)) {
3975 section = &phys_sections[phys_section_rom];
3976 }
3977 io_mem_write(section->mr, addr, val, 4);
3978 } else {
3979 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3980 & TARGET_PAGE_MASK)
3981 + memory_region_section_addr(section, addr);
3982 ptr = qemu_get_ram_ptr(addr1);
3983 stl_p(ptr, val);
3984
3985 if (unlikely(in_migration)) {
3986 if (!cpu_physical_memory_is_dirty(addr1)) {
3987 /* invalidate code */
3988 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3989 /* set dirty bit */
3990 cpu_physical_memory_set_dirty_flags(
3991 addr1, (0xff & ~CODE_DIRTY_FLAG));
3992 }
3993 }
3994 }
3995 }
3996
3997 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3998 {
3999 uint8_t *ptr;
4000 MemoryRegionSection *section;
4001
4002 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4003
4004 if (!memory_region_is_ram(section->mr) || section->readonly) {
4005 addr = memory_region_section_addr(section, addr);
4006 if (memory_region_is_ram(section->mr)) {
4007 section = &phys_sections[phys_section_rom];
4008 }
4009 #ifdef TARGET_WORDS_BIGENDIAN
4010 io_mem_write(section->mr, addr, val >> 32, 4);
4011 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
4012 #else
4013 io_mem_write(section->mr, addr, (uint32_t)val, 4);
4014 io_mem_write(section->mr, addr + 4, val >> 32, 4);
4015 #endif
4016 } else {
4017 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4018 & TARGET_PAGE_MASK)
4019 + memory_region_section_addr(section, addr));
4020 stq_p(ptr, val);
4021 }
4022 }
4023
4024 /* warning: addr must be aligned */
4025 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4026 enum device_endian endian)
4027 {
4028 uint8_t *ptr;
4029 MemoryRegionSection *section;
4030
4031 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4032
4033 if (!memory_region_is_ram(section->mr) || section->readonly) {
4034 addr = memory_region_section_addr(section, addr);
4035 if (memory_region_is_ram(section->mr)) {
4036 section = &phys_sections[phys_section_rom];
4037 }
4038 #if defined(TARGET_WORDS_BIGENDIAN)
4039 if (endian == DEVICE_LITTLE_ENDIAN) {
4040 val = bswap32(val);
4041 }
4042 #else
4043 if (endian == DEVICE_BIG_ENDIAN) {
4044 val = bswap32(val);
4045 }
4046 #endif
4047 io_mem_write(section->mr, addr, val, 4);
4048 } else {
4049 unsigned long addr1;
4050 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4051 + memory_region_section_addr(section, addr);
4052 /* RAM case */
4053 ptr = qemu_get_ram_ptr(addr1);
4054 switch (endian) {
4055 case DEVICE_LITTLE_ENDIAN:
4056 stl_le_p(ptr, val);
4057 break;
4058 case DEVICE_BIG_ENDIAN:
4059 stl_be_p(ptr, val);
4060 break;
4061 default:
4062 stl_p(ptr, val);
4063 break;
4064 }
4065 if (!cpu_physical_memory_is_dirty(addr1)) {
4066 /* invalidate code */
4067 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4068 /* set dirty bit */
4069 cpu_physical_memory_set_dirty_flags(addr1,
4070 (0xff & ~CODE_DIRTY_FLAG));
4071 }
4072 }
4073 }
4074
4075 void stl_phys(target_phys_addr_t addr, uint32_t val)
4076 {
4077 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4078 }
4079
4080 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4081 {
4082 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4083 }
4084
4085 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4086 {
4087 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4088 }
4089
4090 /* XXX: optimize */
4091 void stb_phys(target_phys_addr_t addr, uint32_t val)
4092 {
4093 uint8_t v = val;
4094 cpu_physical_memory_write(addr, &v, 1);
4095 }
4096
4097 /* warning: addr must be aligned */
4098 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4099 enum device_endian endian)
4100 {
4101 uint8_t *ptr;
4102 MemoryRegionSection *section;
4103
4104 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4105
4106 if (!memory_region_is_ram(section->mr) || section->readonly) {
4107 addr = memory_region_section_addr(section, addr);
4108 if (memory_region_is_ram(section->mr)) {
4109 section = &phys_sections[phys_section_rom];
4110 }
4111 #if defined(TARGET_WORDS_BIGENDIAN)
4112 if (endian == DEVICE_LITTLE_ENDIAN) {
4113 val = bswap16(val);
4114 }
4115 #else
4116 if (endian == DEVICE_BIG_ENDIAN) {
4117 val = bswap16(val);
4118 }
4119 #endif
4120 io_mem_write(section->mr, addr, val, 2);
4121 } else {
4122 unsigned long addr1;
4123 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4124 + memory_region_section_addr(section, addr);
4125 /* RAM case */
4126 ptr = qemu_get_ram_ptr(addr1);
4127 switch (endian) {
4128 case DEVICE_LITTLE_ENDIAN:
4129 stw_le_p(ptr, val);
4130 break;
4131 case DEVICE_BIG_ENDIAN:
4132 stw_be_p(ptr, val);
4133 break;
4134 default:
4135 stw_p(ptr, val);
4136 break;
4137 }
4138 if (!cpu_physical_memory_is_dirty(addr1)) {
4139 /* invalidate code */
4140 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4141 /* set dirty bit */
4142 cpu_physical_memory_set_dirty_flags(addr1,
4143 (0xff & ~CODE_DIRTY_FLAG));
4144 }
4145 }
4146 }
4147
4148 void stw_phys(target_phys_addr_t addr, uint32_t val)
4149 {
4150 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4151 }
4152
4153 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4154 {
4155 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4156 }
4157
4158 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4159 {
4160 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4161 }
4162
4163 /* XXX: optimize */
4164 void stq_phys(target_phys_addr_t addr, uint64_t val)
4165 {
4166 val = tswap64(val);
4167 cpu_physical_memory_write(addr, &val, 8);
4168 }
4169
4170 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4171 {
4172 val = cpu_to_le64(val);
4173 cpu_physical_memory_write(addr, &val, 8);
4174 }
4175
4176 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4177 {
4178 val = cpu_to_be64(val);
4179 cpu_physical_memory_write(addr, &val, 8);
4180 }
4181
4182 /* virtual memory access for debug (includes writing to ROM) */
4183 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4184 uint8_t *buf, int len, int is_write)
4185 {
4186 int l;
4187 target_phys_addr_t phys_addr;
4188 target_ulong page;
4189
4190 while (len > 0) {
4191 page = addr & TARGET_PAGE_MASK;
4192 phys_addr = cpu_get_phys_page_debug(env, page);
4193 /* if no physical page mapped, return an error */
4194 if (phys_addr == -1)
4195 return -1;
4196 l = (page + TARGET_PAGE_SIZE) - addr;
4197 if (l > len)
4198 l = len;
4199 phys_addr += (addr & ~TARGET_PAGE_MASK);
4200 if (is_write)
4201 cpu_physical_memory_write_rom(phys_addr, buf, l);
4202 else
4203 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4204 len -= l;
4205 buf += l;
4206 addr += l;
4207 }
4208 return 0;
4209 }
4210 #endif
4211
4212 /* in deterministic execution mode, instructions doing device I/Os
4213 must be at the end of the TB */
4214 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4215 {
4216 TranslationBlock *tb;
4217 uint32_t n, cflags;
4218 target_ulong pc, cs_base;
4219 uint64_t flags;
4220
4221 tb = tb_find_pc(retaddr);
4222 if (!tb) {
4223 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4224 (void *)retaddr);
4225 }
4226 n = env->icount_decr.u16.low + tb->icount;
4227 cpu_restore_state(tb, env, retaddr);
4228 /* Calculate how many instructions had been executed before the fault
4229 occurred. */
4230 n = n - env->icount_decr.u16.low;
4231 /* Generate a new TB ending on the I/O insn. */
4232 n++;
4233 /* On MIPS and SH, delay slot instructions can only be restarted if
4234 they were already the first instruction in the TB. If this is not
4235 the first instruction in a TB then re-execute the preceding
4236 branch. */
4237 #if defined(TARGET_MIPS)
4238 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4239 env->active_tc.PC -= 4;
4240 env->icount_decr.u16.low++;
4241 env->hflags &= ~MIPS_HFLAG_BMASK;
4242 }
4243 #elif defined(TARGET_SH4)
4244 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4245 && n > 1) {
4246 env->pc -= 2;
4247 env->icount_decr.u16.low++;
4248 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4249 }
4250 #endif
4251 /* This should never happen. */
4252 if (n > CF_COUNT_MASK)
4253 cpu_abort(env, "TB too big during recompile");
4254
4255 cflags = n | CF_LAST_IO;
4256 pc = tb->pc;
4257 cs_base = tb->cs_base;
4258 flags = tb->flags;
4259 tb_phys_invalidate(tb, -1);
4260 /* FIXME: In theory this could raise an exception. In practice
4261 we have already translated the block once so it's probably ok. */
4262 tb_gen_code(env, pc, cs_base, flags, cflags);
4263 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4264 the first in the TB) then we end up generating a whole new TB and
4265 repeating the fault, which is horribly inefficient.
4266 Better would be to execute just this insn uncached, or generate a
4267 second new TB. */
4268 cpu_resume_from_signal(env, NULL);
4269 }
4270
4271 #if !defined(CONFIG_USER_ONLY)
4272
4273 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4274 {
4275 int i, target_code_size, max_target_code_size;
4276 int direct_jmp_count, direct_jmp2_count, cross_page;
4277 TranslationBlock *tb;
4278
4279 target_code_size = 0;
4280 max_target_code_size = 0;
4281 cross_page = 0;
4282 direct_jmp_count = 0;
4283 direct_jmp2_count = 0;
4284 for(i = 0; i < nb_tbs; i++) {
4285 tb = &tbs[i];
4286 target_code_size += tb->size;
4287 if (tb->size > max_target_code_size)
4288 max_target_code_size = tb->size;
4289 if (tb->page_addr[1] != -1)
4290 cross_page++;
4291 if (tb->tb_next_offset[0] != 0xffff) {
4292 direct_jmp_count++;
4293 if (tb->tb_next_offset[1] != 0xffff) {
4294 direct_jmp2_count++;
4295 }
4296 }
4297 }
4298 /* XXX: avoid using doubles ? */
4299 cpu_fprintf(f, "Translation buffer state:\n");
4300 cpu_fprintf(f, "gen code size %td/%ld\n",
4301 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4302 cpu_fprintf(f, "TB count %d/%d\n",
4303 nb_tbs, code_gen_max_blocks);
4304 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4305 nb_tbs ? target_code_size / nb_tbs : 0,
4306 max_target_code_size);
4307 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4308 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4309 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4310 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4311 cross_page,
4312 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4313 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4314 direct_jmp_count,
4315 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4316 direct_jmp2_count,
4317 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4318 cpu_fprintf(f, "\nStatistics:\n");
4319 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4320 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4321 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4322 tcg_dump_info(f, cpu_fprintf);
4323 }
4324
4325 /*
4326 * A helper function for the _utterly broken_ virtio device model to find out if
4327 * it's running on a big endian machine. Don't do this at home kids!
4328 */
4329 bool virtio_is_big_endian(void);
4330 bool virtio_is_big_endian(void)
4331 {
4332 #if defined(TARGET_WORDS_BIGENDIAN)
4333 return true;
4334 #else
4335 return false;
4336 #endif
4337 }
4338
4339 #endif