]> git.proxmox.com Git - qemu.git/blob - exec.c
xenstore: Use <xenstore.h>
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #define WANT_EXEC_OBSOLETE
63 #include "exec-obsolete.h"
64
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
68
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
74
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
79
80 #define SMC_BITMAP_USE_THRESHOLD 10
81
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32) && !defined(_WIN64)
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
103
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
110
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
114
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
119
120 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
121 static MemoryRegion io_mem_subpage_ram;
122
123 #endif
124
125 CPUArchState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 DEFINE_TLS(CPUArchState *,cpu_single_env);
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
145
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
157
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
161
162 #define P_L2_LEVELS \
163 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
164
165 /* The bits remaining after N lower levels of page tables. */
166 #define V_L1_BITS_REM \
167 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168
169 #if V_L1_BITS_REM < 4
170 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
171 #else
172 #define V_L1_BITS V_L1_BITS_REM
173 #endif
174
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
176
177 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
178
179 uintptr_t qemu_real_host_page_size;
180 uintptr_t qemu_host_page_size;
181 uintptr_t qemu_host_page_mask;
182
183 /* This is a multi-level map on the virtual address space.
184 The bottom level has pointers to PageDesc. */
185 static void *l1_map[V_L1_SIZE];
186
187 #if !defined(CONFIG_USER_ONLY)
188 typedef struct PhysPageEntry PhysPageEntry;
189
190 static MemoryRegionSection *phys_sections;
191 static unsigned phys_sections_nb, phys_sections_nb_alloc;
192 static uint16_t phys_section_unassigned;
193 static uint16_t phys_section_notdirty;
194 static uint16_t phys_section_rom;
195 static uint16_t phys_section_watch;
196
197 struct PhysPageEntry {
198 uint16_t is_leaf : 1;
199 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
200 uint16_t ptr : 15;
201 };
202
203 /* Simple allocator for PhysPageEntry nodes */
204 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
205 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
206
207 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
208
209 /* This is a multi-level map on the physical address space.
210 The bottom level has pointers to MemoryRegionSections. */
211 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
212
213 static void io_mem_init(void);
214 static void memory_map_init(void);
215
216 static MemoryRegion io_mem_watch;
217 #endif
218
219 /* log support */
220 #ifdef WIN32
221 static const char *logfilename = "qemu.log";
222 #else
223 static const char *logfilename = "/tmp/qemu.log";
224 #endif
225 FILE *logfile;
226 int loglevel;
227 static int log_append = 0;
228
229 /* statistics */
230 static int tb_flush_count;
231 static int tb_phys_invalidate_count;
232
233 #ifdef _WIN32
234 static void map_exec(void *addr, long size)
235 {
236 DWORD old_protect;
237 VirtualProtect(addr, size,
238 PAGE_EXECUTE_READWRITE, &old_protect);
239
240 }
241 #else
242 static void map_exec(void *addr, long size)
243 {
244 unsigned long start, end, page_size;
245
246 page_size = getpagesize();
247 start = (unsigned long)addr;
248 start &= ~(page_size - 1);
249
250 end = (unsigned long)addr + size;
251 end += page_size - 1;
252 end &= ~(page_size - 1);
253
254 mprotect((void *)start, end - start,
255 PROT_READ | PROT_WRITE | PROT_EXEC);
256 }
257 #endif
258
259 static void page_init(void)
260 {
261 /* NOTE: we can always suppose that qemu_host_page_size >=
262 TARGET_PAGE_SIZE */
263 #ifdef _WIN32
264 {
265 SYSTEM_INFO system_info;
266
267 GetSystemInfo(&system_info);
268 qemu_real_host_page_size = system_info.dwPageSize;
269 }
270 #else
271 qemu_real_host_page_size = getpagesize();
272 #endif
273 if (qemu_host_page_size == 0)
274 qemu_host_page_size = qemu_real_host_page_size;
275 if (qemu_host_page_size < TARGET_PAGE_SIZE)
276 qemu_host_page_size = TARGET_PAGE_SIZE;
277 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278
279 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 {
281 #ifdef HAVE_KINFO_GETVMMAP
282 struct kinfo_vmentry *freep;
283 int i, cnt;
284
285 freep = kinfo_getvmmap(getpid(), &cnt);
286 if (freep) {
287 mmap_lock();
288 for (i = 0; i < cnt; i++) {
289 unsigned long startaddr, endaddr;
290
291 startaddr = freep[i].kve_start;
292 endaddr = freep[i].kve_end;
293 if (h2g_valid(startaddr)) {
294 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295
296 if (h2g_valid(endaddr)) {
297 endaddr = h2g(endaddr);
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 } else {
300 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
301 endaddr = ~0ul;
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 #endif
304 }
305 }
306 }
307 free(freep);
308 mmap_unlock();
309 }
310 #else
311 FILE *f;
312
313 last_brk = (unsigned long)sbrk(0);
314
315 f = fopen("/compat/linux/proc/self/maps", "r");
316 if (f) {
317 mmap_lock();
318
319 do {
320 unsigned long startaddr, endaddr;
321 int n;
322
323 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324
325 if (n == 2 && h2g_valid(startaddr)) {
326 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327
328 if (h2g_valid(endaddr)) {
329 endaddr = h2g(endaddr);
330 } else {
331 endaddr = ~0ul;
332 }
333 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 }
335 } while (!feof(f));
336
337 fclose(f);
338 mmap_unlock();
339 }
340 #endif
341 }
342 #endif
343 }
344
345 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 {
347 PageDesc *pd;
348 void **lp;
349 int i;
350
351 #if defined(CONFIG_USER_ONLY)
352 /* We can't use g_malloc because it may recurse into a locked mutex. */
353 # define ALLOC(P, SIZE) \
354 do { \
355 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
356 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
357 } while (0)
358 #else
359 # define ALLOC(P, SIZE) \
360 do { P = g_malloc0(SIZE); } while (0)
361 #endif
362
363 /* Level 1. Always allocated. */
364 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365
366 /* Level 2..N-1. */
367 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
368 void **p = *lp;
369
370 if (p == NULL) {
371 if (!alloc) {
372 return NULL;
373 }
374 ALLOC(p, sizeof(void *) * L2_SIZE);
375 *lp = p;
376 }
377
378 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
379 }
380
381 pd = *lp;
382 if (pd == NULL) {
383 if (!alloc) {
384 return NULL;
385 }
386 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
387 *lp = pd;
388 }
389
390 #undef ALLOC
391
392 return pd + (index & (L2_SIZE - 1));
393 }
394
395 static inline PageDesc *page_find(tb_page_addr_t index)
396 {
397 return page_find_alloc(index, 0);
398 }
399
400 #if !defined(CONFIG_USER_ONLY)
401
402 static void phys_map_node_reserve(unsigned nodes)
403 {
404 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
405 typedef PhysPageEntry Node[L2_SIZE];
406 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
407 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
408 phys_map_nodes_nb + nodes);
409 phys_map_nodes = g_renew(Node, phys_map_nodes,
410 phys_map_nodes_nb_alloc);
411 }
412 }
413
414 static uint16_t phys_map_node_alloc(void)
415 {
416 unsigned i;
417 uint16_t ret;
418
419 ret = phys_map_nodes_nb++;
420 assert(ret != PHYS_MAP_NODE_NIL);
421 assert(ret != phys_map_nodes_nb_alloc);
422 for (i = 0; i < L2_SIZE; ++i) {
423 phys_map_nodes[ret][i].is_leaf = 0;
424 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
425 }
426 return ret;
427 }
428
429 static void phys_map_nodes_reset(void)
430 {
431 phys_map_nodes_nb = 0;
432 }
433
434
435 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
436 target_phys_addr_t *nb, uint16_t leaf,
437 int level)
438 {
439 PhysPageEntry *p;
440 int i;
441 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
442
443 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
444 lp->ptr = phys_map_node_alloc();
445 p = phys_map_nodes[lp->ptr];
446 if (level == 0) {
447 for (i = 0; i < L2_SIZE; i++) {
448 p[i].is_leaf = 1;
449 p[i].ptr = phys_section_unassigned;
450 }
451 }
452 } else {
453 p = phys_map_nodes[lp->ptr];
454 }
455 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
456
457 while (*nb && lp < &p[L2_SIZE]) {
458 if ((*index & (step - 1)) == 0 && *nb >= step) {
459 lp->is_leaf = true;
460 lp->ptr = leaf;
461 *index += step;
462 *nb -= step;
463 } else {
464 phys_page_set_level(lp, index, nb, leaf, level - 1);
465 }
466 ++lp;
467 }
468 }
469
470 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
471 uint16_t leaf)
472 {
473 /* Wildly overreserve - it doesn't matter much. */
474 phys_map_node_reserve(3 * P_L2_LEVELS);
475
476 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
477 }
478
479 MemoryRegionSection *phys_page_find(target_phys_addr_t index)
480 {
481 PhysPageEntry lp = phys_map;
482 PhysPageEntry *p;
483 int i;
484 uint16_t s_index = phys_section_unassigned;
485
486 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
487 if (lp.ptr == PHYS_MAP_NODE_NIL) {
488 goto not_found;
489 }
490 p = phys_map_nodes[lp.ptr];
491 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
492 }
493
494 s_index = lp.ptr;
495 not_found:
496 return &phys_sections[s_index];
497 }
498
499 bool memory_region_is_unassigned(MemoryRegion *mr)
500 {
501 return mr != &io_mem_ram && mr != &io_mem_rom
502 && mr != &io_mem_notdirty && !mr->rom_device
503 && mr != &io_mem_watch;
504 }
505
506 #define mmap_lock() do { } while(0)
507 #define mmap_unlock() do { } while(0)
508 #endif
509
510 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
511
512 #if defined(CONFIG_USER_ONLY)
513 /* Currently it is not recommended to allocate big chunks of data in
514 user mode. It will change when a dedicated libc will be used */
515 #define USE_STATIC_CODE_GEN_BUFFER
516 #endif
517
518 #ifdef USE_STATIC_CODE_GEN_BUFFER
519 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
520 __attribute__((aligned (CODE_GEN_ALIGN)));
521 #endif
522
523 static void code_gen_alloc(unsigned long tb_size)
524 {
525 #ifdef USE_STATIC_CODE_GEN_BUFFER
526 code_gen_buffer = static_code_gen_buffer;
527 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 map_exec(code_gen_buffer, code_gen_buffer_size);
529 #else
530 code_gen_buffer_size = tb_size;
531 if (code_gen_buffer_size == 0) {
532 #if defined(CONFIG_USER_ONLY)
533 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
534 #else
535 /* XXX: needs adjustments */
536 code_gen_buffer_size = (unsigned long)(ram_size / 4);
537 #endif
538 }
539 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
540 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
541 /* The code gen buffer location may have constraints depending on
542 the host cpu and OS */
543 #if defined(__linux__)
544 {
545 int flags;
546 void *start = NULL;
547
548 flags = MAP_PRIVATE | MAP_ANONYMOUS;
549 #if defined(__x86_64__)
550 flags |= MAP_32BIT;
551 /* Cannot map more than that */
552 if (code_gen_buffer_size > (800 * 1024 * 1024))
553 code_gen_buffer_size = (800 * 1024 * 1024);
554 #elif defined(__sparc_v9__)
555 // Map the buffer below 2G, so we can use direct calls and branches
556 flags |= MAP_FIXED;
557 start = (void *) 0x60000000UL;
558 if (code_gen_buffer_size > (512 * 1024 * 1024))
559 code_gen_buffer_size = (512 * 1024 * 1024);
560 #elif defined(__arm__)
561 /* Keep the buffer no bigger than 16MB to branch between blocks */
562 if (code_gen_buffer_size > 16 * 1024 * 1024)
563 code_gen_buffer_size = 16 * 1024 * 1024;
564 #elif defined(__s390x__)
565 /* Map the buffer so that we can use direct calls and branches. */
566 /* We have a +- 4GB range on the branches; leave some slop. */
567 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
568 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
569 }
570 start = (void *)0x90000000UL;
571 #endif
572 code_gen_buffer = mmap(start, code_gen_buffer_size,
573 PROT_WRITE | PROT_READ | PROT_EXEC,
574 flags, -1, 0);
575 if (code_gen_buffer == MAP_FAILED) {
576 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
577 exit(1);
578 }
579 }
580 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
581 || defined(__DragonFly__) || defined(__OpenBSD__) \
582 || defined(__NetBSD__)
583 {
584 int flags;
585 void *addr = NULL;
586 flags = MAP_PRIVATE | MAP_ANONYMOUS;
587 #if defined(__x86_64__)
588 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
589 * 0x40000000 is free */
590 flags |= MAP_FIXED;
591 addr = (void *)0x40000000;
592 /* Cannot map more than that */
593 if (code_gen_buffer_size > (800 * 1024 * 1024))
594 code_gen_buffer_size = (800 * 1024 * 1024);
595 #elif defined(__sparc_v9__)
596 // Map the buffer below 2G, so we can use direct calls and branches
597 flags |= MAP_FIXED;
598 addr = (void *) 0x60000000UL;
599 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
600 code_gen_buffer_size = (512 * 1024 * 1024);
601 }
602 #endif
603 code_gen_buffer = mmap(addr, code_gen_buffer_size,
604 PROT_WRITE | PROT_READ | PROT_EXEC,
605 flags, -1, 0);
606 if (code_gen_buffer == MAP_FAILED) {
607 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
608 exit(1);
609 }
610 }
611 #else
612 code_gen_buffer = g_malloc(code_gen_buffer_size);
613 map_exec(code_gen_buffer, code_gen_buffer_size);
614 #endif
615 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
616 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
617 code_gen_buffer_max_size = code_gen_buffer_size -
618 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
619 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
620 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
621 }
622
623 /* Must be called before using the QEMU cpus. 'tb_size' is the size
624 (in bytes) allocated to the translation buffer. Zero means default
625 size. */
626 void tcg_exec_init(unsigned long tb_size)
627 {
628 cpu_gen_init();
629 code_gen_alloc(tb_size);
630 code_gen_ptr = code_gen_buffer;
631 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 page_init();
633 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
634 /* There's no guest base to take into account, so go ahead and
635 initialize the prologue now. */
636 tcg_prologue_init(&tcg_ctx);
637 #endif
638 }
639
640 bool tcg_enabled(void)
641 {
642 return code_gen_buffer != NULL;
643 }
644
645 void cpu_exec_init_all(void)
646 {
647 #if !defined(CONFIG_USER_ONLY)
648 memory_map_init();
649 io_mem_init();
650 #endif
651 }
652
653 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654
655 static int cpu_common_post_load(void *opaque, int version_id)
656 {
657 CPUArchState *env = opaque;
658
659 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
660 version_id is increased. */
661 env->interrupt_request &= ~0x01;
662 tlb_flush(env, 1);
663
664 return 0;
665 }
666
667 static const VMStateDescription vmstate_cpu_common = {
668 .name = "cpu_common",
669 .version_id = 1,
670 .minimum_version_id = 1,
671 .minimum_version_id_old = 1,
672 .post_load = cpu_common_post_load,
673 .fields = (VMStateField []) {
674 VMSTATE_UINT32(halted, CPUArchState),
675 VMSTATE_UINT32(interrupt_request, CPUArchState),
676 VMSTATE_END_OF_LIST()
677 }
678 };
679 #endif
680
681 CPUArchState *qemu_get_cpu(int cpu)
682 {
683 CPUArchState *env = first_cpu;
684
685 while (env) {
686 if (env->cpu_index == cpu)
687 break;
688 env = env->next_cpu;
689 }
690
691 return env;
692 }
693
694 void cpu_exec_init(CPUArchState *env)
695 {
696 CPUArchState **penv;
697 int cpu_index;
698
699 #if defined(CONFIG_USER_ONLY)
700 cpu_list_lock();
701 #endif
702 env->next_cpu = NULL;
703 penv = &first_cpu;
704 cpu_index = 0;
705 while (*penv != NULL) {
706 penv = &(*penv)->next_cpu;
707 cpu_index++;
708 }
709 env->cpu_index = cpu_index;
710 env->numa_node = 0;
711 QTAILQ_INIT(&env->breakpoints);
712 QTAILQ_INIT(&env->watchpoints);
713 #ifndef CONFIG_USER_ONLY
714 env->thread_id = qemu_get_thread_id();
715 #endif
716 *penv = env;
717 #if defined(CONFIG_USER_ONLY)
718 cpu_list_unlock();
719 #endif
720 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
721 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
722 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
723 cpu_save, cpu_load, env);
724 #endif
725 }
726
727 /* Allocate a new translation block. Flush the translation buffer if
728 too many translation blocks or too much generated code. */
729 static TranslationBlock *tb_alloc(target_ulong pc)
730 {
731 TranslationBlock *tb;
732
733 if (nb_tbs >= code_gen_max_blocks ||
734 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
735 return NULL;
736 tb = &tbs[nb_tbs++];
737 tb->pc = pc;
738 tb->cflags = 0;
739 return tb;
740 }
741
742 void tb_free(TranslationBlock *tb)
743 {
744 /* In practice this is mostly used for single use temporary TB
745 Ignore the hard cases and just back up if this TB happens to
746 be the last one generated. */
747 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
748 code_gen_ptr = tb->tc_ptr;
749 nb_tbs--;
750 }
751 }
752
753 static inline void invalidate_page_bitmap(PageDesc *p)
754 {
755 if (p->code_bitmap) {
756 g_free(p->code_bitmap);
757 p->code_bitmap = NULL;
758 }
759 p->code_write_count = 0;
760 }
761
762 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763
764 static void page_flush_tb_1 (int level, void **lp)
765 {
766 int i;
767
768 if (*lp == NULL) {
769 return;
770 }
771 if (level == 0) {
772 PageDesc *pd = *lp;
773 for (i = 0; i < L2_SIZE; ++i) {
774 pd[i].first_tb = NULL;
775 invalidate_page_bitmap(pd + i);
776 }
777 } else {
778 void **pp = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 page_flush_tb_1 (level - 1, pp + i);
781 }
782 }
783 }
784
785 static void page_flush_tb(void)
786 {
787 int i;
788 for (i = 0; i < V_L1_SIZE; i++) {
789 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
790 }
791 }
792
793 /* flush all the translation blocks */
794 /* XXX: tb_flush is currently not thread safe */
795 void tb_flush(CPUArchState *env1)
796 {
797 CPUArchState *env;
798 #if defined(DEBUG_FLUSH)
799 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
800 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 nb_tbs, nb_tbs > 0 ?
802 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 #endif
804 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
805 cpu_abort(env1, "Internal error: code buffer overflow\n");
806
807 nb_tbs = 0;
808
809 for(env = first_cpu; env != NULL; env = env->next_cpu) {
810 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
811 }
812
813 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
814 page_flush_tb();
815
816 code_gen_ptr = code_gen_buffer;
817 /* XXX: flush processor icache at this point if cache flush is
818 expensive */
819 tb_flush_count++;
820 }
821
822 #ifdef DEBUG_TB_CHECK
823
824 static void tb_invalidate_check(target_ulong address)
825 {
826 TranslationBlock *tb;
827 int i;
828 address &= TARGET_PAGE_MASK;
829 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
830 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
831 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
832 address >= tb->pc + tb->size)) {
833 printf("ERROR invalidate: address=" TARGET_FMT_lx
834 " PC=%08lx size=%04x\n",
835 address, (long)tb->pc, tb->size);
836 }
837 }
838 }
839 }
840
841 /* verify that all the pages have correct rights for code */
842 static void tb_page_check(void)
843 {
844 TranslationBlock *tb;
845 int i, flags1, flags2;
846
847 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
848 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
849 flags1 = page_get_flags(tb->pc);
850 flags2 = page_get_flags(tb->pc + tb->size - 1);
851 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
852 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
853 (long)tb->pc, tb->size, flags1, flags2);
854 }
855 }
856 }
857 }
858
859 #endif
860
861 /* invalidate one TB */
862 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
863 int next_offset)
864 {
865 TranslationBlock *tb1;
866 for(;;) {
867 tb1 = *ptb;
868 if (tb1 == tb) {
869 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
870 break;
871 }
872 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
873 }
874 }
875
876 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 {
878 TranslationBlock *tb1;
879 unsigned int n1;
880
881 for(;;) {
882 tb1 = *ptb;
883 n1 = (uintptr_t)tb1 & 3;
884 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 if (tb1 == tb) {
886 *ptb = tb1->page_next[n1];
887 break;
888 }
889 ptb = &tb1->page_next[n1];
890 }
891 }
892
893 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 {
895 TranslationBlock *tb1, **ptb;
896 unsigned int n1;
897
898 ptb = &tb->jmp_next[n];
899 tb1 = *ptb;
900 if (tb1) {
901 /* find tb(n) in circular list */
902 for(;;) {
903 tb1 = *ptb;
904 n1 = (uintptr_t)tb1 & 3;
905 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
906 if (n1 == n && tb1 == tb)
907 break;
908 if (n1 == 2) {
909 ptb = &tb1->jmp_first;
910 } else {
911 ptb = &tb1->jmp_next[n1];
912 }
913 }
914 /* now we can suppress tb(n) from the list */
915 *ptb = tb->jmp_next[n];
916
917 tb->jmp_next[n] = NULL;
918 }
919 }
920
921 /* reset the jump entry 'n' of a TB so that it is not chained to
922 another TB */
923 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 {
925 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
926 }
927
928 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 {
930 CPUArchState *env;
931 PageDesc *p;
932 unsigned int h, n1;
933 tb_page_addr_t phys_pc;
934 TranslationBlock *tb1, *tb2;
935
936 /* remove the TB from the hash list */
937 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
938 h = tb_phys_hash_func(phys_pc);
939 tb_remove(&tb_phys_hash[h], tb,
940 offsetof(TranslationBlock, phys_hash_next));
941
942 /* remove the TB from the page list */
943 if (tb->page_addr[0] != page_addr) {
944 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
945 tb_page_remove(&p->first_tb, tb);
946 invalidate_page_bitmap(p);
947 }
948 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
949 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
950 tb_page_remove(&p->first_tb, tb);
951 invalidate_page_bitmap(p);
952 }
953
954 tb_invalidated_flag = 1;
955
956 /* remove the TB from the hash list */
957 h = tb_jmp_cache_hash_func(tb->pc);
958 for(env = first_cpu; env != NULL; env = env->next_cpu) {
959 if (env->tb_jmp_cache[h] == tb)
960 env->tb_jmp_cache[h] = NULL;
961 }
962
963 /* suppress this TB from the two jump lists */
964 tb_jmp_remove(tb, 0);
965 tb_jmp_remove(tb, 1);
966
967 /* suppress any remaining jumps to this TB */
968 tb1 = tb->jmp_first;
969 for(;;) {
970 n1 = (uintptr_t)tb1 & 3;
971 if (n1 == 2)
972 break;
973 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
974 tb2 = tb1->jmp_next[n1];
975 tb_reset_jump(tb1, n1);
976 tb1->jmp_next[n1] = NULL;
977 tb1 = tb2;
978 }
979 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980
981 tb_phys_invalidate_count++;
982 }
983
984 static inline void set_bits(uint8_t *tab, int start, int len)
985 {
986 int end, mask, end1;
987
988 end = start + len;
989 tab += start >> 3;
990 mask = 0xff << (start & 7);
991 if ((start & ~7) == (end & ~7)) {
992 if (start < end) {
993 mask &= ~(0xff << (end & 7));
994 *tab |= mask;
995 }
996 } else {
997 *tab++ |= mask;
998 start = (start + 8) & ~7;
999 end1 = end & ~7;
1000 while (start < end1) {
1001 *tab++ = 0xff;
1002 start += 8;
1003 }
1004 if (start < end) {
1005 mask = ~(0xff << (end & 7));
1006 *tab |= mask;
1007 }
1008 }
1009 }
1010
1011 static void build_page_bitmap(PageDesc *p)
1012 {
1013 int n, tb_start, tb_end;
1014 TranslationBlock *tb;
1015
1016 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1017
1018 tb = p->first_tb;
1019 while (tb != NULL) {
1020 n = (uintptr_t)tb & 3;
1021 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1022 /* NOTE: this is subtle as a TB may span two physical pages */
1023 if (n == 0) {
1024 /* NOTE: tb_end may be after the end of the page, but
1025 it is not a problem */
1026 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1027 tb_end = tb_start + tb->size;
1028 if (tb_end > TARGET_PAGE_SIZE)
1029 tb_end = TARGET_PAGE_SIZE;
1030 } else {
1031 tb_start = 0;
1032 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 }
1034 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1035 tb = tb->page_next[n];
1036 }
1037 }
1038
1039 TranslationBlock *tb_gen_code(CPUArchState *env,
1040 target_ulong pc, target_ulong cs_base,
1041 int flags, int cflags)
1042 {
1043 TranslationBlock *tb;
1044 uint8_t *tc_ptr;
1045 tb_page_addr_t phys_pc, phys_page2;
1046 target_ulong virt_page2;
1047 int code_gen_size;
1048
1049 phys_pc = get_page_addr_code(env, pc);
1050 tb = tb_alloc(pc);
1051 if (!tb) {
1052 /* flush must be done */
1053 tb_flush(env);
1054 /* cannot fail at this point */
1055 tb = tb_alloc(pc);
1056 /* Don't forget to invalidate previous TB info. */
1057 tb_invalidated_flag = 1;
1058 }
1059 tc_ptr = code_gen_ptr;
1060 tb->tc_ptr = tc_ptr;
1061 tb->cs_base = cs_base;
1062 tb->flags = flags;
1063 tb->cflags = cflags;
1064 cpu_gen_code(env, tb, &code_gen_size);
1065 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1066 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067
1068 /* check next page if needed */
1069 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 phys_page2 = -1;
1071 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1072 phys_page2 = get_page_addr_code(env, virt_page2);
1073 }
1074 tb_link_page(tb, phys_pc, phys_page2);
1075 return tb;
1076 }
1077
1078 /*
1079 * Invalidate all TBs which intersect with the target physical address range
1080 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1081 * 'is_cpu_write_access' should be true if called from a real cpu write
1082 * access: the virtual CPU will exit the current TB if code is modified inside
1083 * this TB.
1084 */
1085 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1086 int is_cpu_write_access)
1087 {
1088 while (start < end) {
1089 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1090 start &= TARGET_PAGE_MASK;
1091 start += TARGET_PAGE_SIZE;
1092 }
1093 }
1094
1095 /*
1096 * Invalidate all TBs which intersect with the target physical address range
1097 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1098 * 'is_cpu_write_access' should be true if called from a real cpu write
1099 * access: the virtual CPU will exit the current TB if code is modified inside
1100 * this TB.
1101 */
1102 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1103 int is_cpu_write_access)
1104 {
1105 TranslationBlock *tb, *tb_next, *saved_tb;
1106 CPUArchState *env = cpu_single_env;
1107 tb_page_addr_t tb_start, tb_end;
1108 PageDesc *p;
1109 int n;
1110 #ifdef TARGET_HAS_PRECISE_SMC
1111 int current_tb_not_found = is_cpu_write_access;
1112 TranslationBlock *current_tb = NULL;
1113 int current_tb_modified = 0;
1114 target_ulong current_pc = 0;
1115 target_ulong current_cs_base = 0;
1116 int current_flags = 0;
1117 #endif /* TARGET_HAS_PRECISE_SMC */
1118
1119 p = page_find(start >> TARGET_PAGE_BITS);
1120 if (!p)
1121 return;
1122 if (!p->code_bitmap &&
1123 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1124 is_cpu_write_access) {
1125 /* build code bitmap */
1126 build_page_bitmap(p);
1127 }
1128
1129 /* we remove all the TBs in the range [start, end[ */
1130 /* XXX: see if in some cases it could be faster to invalidate all the code */
1131 tb = p->first_tb;
1132 while (tb != NULL) {
1133 n = (uintptr_t)tb & 3;
1134 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1135 tb_next = tb->page_next[n];
1136 /* NOTE: this is subtle as a TB may span two physical pages */
1137 if (n == 0) {
1138 /* NOTE: tb_end may be after the end of the page, but
1139 it is not a problem */
1140 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1141 tb_end = tb_start + tb->size;
1142 } else {
1143 tb_start = tb->page_addr[1];
1144 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1145 }
1146 if (!(tb_end <= start || tb_start >= end)) {
1147 #ifdef TARGET_HAS_PRECISE_SMC
1148 if (current_tb_not_found) {
1149 current_tb_not_found = 0;
1150 current_tb = NULL;
1151 if (env->mem_io_pc) {
1152 /* now we have a real cpu fault */
1153 current_tb = tb_find_pc(env->mem_io_pc);
1154 }
1155 }
1156 if (current_tb == tb &&
1157 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1158 /* If we are modifying the current TB, we must stop
1159 its execution. We could be more precise by checking
1160 that the modification is after the current PC, but it
1161 would require a specialized function to partially
1162 restore the CPU state */
1163
1164 current_tb_modified = 1;
1165 cpu_restore_state(current_tb, env, env->mem_io_pc);
1166 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1167 &current_flags);
1168 }
1169 #endif /* TARGET_HAS_PRECISE_SMC */
1170 /* we need to do that to handle the case where a signal
1171 occurs while doing tb_phys_invalidate() */
1172 saved_tb = NULL;
1173 if (env) {
1174 saved_tb = env->current_tb;
1175 env->current_tb = NULL;
1176 }
1177 tb_phys_invalidate(tb, -1);
1178 if (env) {
1179 env->current_tb = saved_tb;
1180 if (env->interrupt_request && env->current_tb)
1181 cpu_interrupt(env, env->interrupt_request);
1182 }
1183 }
1184 tb = tb_next;
1185 }
1186 #if !defined(CONFIG_USER_ONLY)
1187 /* if no code remaining, no need to continue to use slow writes */
1188 if (!p->first_tb) {
1189 invalidate_page_bitmap(p);
1190 if (is_cpu_write_access) {
1191 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1192 }
1193 }
1194 #endif
1195 #ifdef TARGET_HAS_PRECISE_SMC
1196 if (current_tb_modified) {
1197 /* we generate a block containing just the instruction
1198 modifying the memory. It will ensure that it cannot modify
1199 itself */
1200 env->current_tb = NULL;
1201 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1202 cpu_resume_from_signal(env, NULL);
1203 }
1204 #endif
1205 }
1206
1207 /* len must be <= 8 and start must be a multiple of len */
1208 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1209 {
1210 PageDesc *p;
1211 int offset, b;
1212 #if 0
1213 if (1) {
1214 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1215 cpu_single_env->mem_io_vaddr, len,
1216 cpu_single_env->eip,
1217 cpu_single_env->eip +
1218 (intptr_t)cpu_single_env->segs[R_CS].base);
1219 }
1220 #endif
1221 p = page_find(start >> TARGET_PAGE_BITS);
1222 if (!p)
1223 return;
1224 if (p->code_bitmap) {
1225 offset = start & ~TARGET_PAGE_MASK;
1226 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1227 if (b & ((1 << len) - 1))
1228 goto do_invalidate;
1229 } else {
1230 do_invalidate:
1231 tb_invalidate_phys_page_range(start, start + len, 1);
1232 }
1233 }
1234
1235 #if !defined(CONFIG_SOFTMMU)
1236 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1237 uintptr_t pc, void *puc)
1238 {
1239 TranslationBlock *tb;
1240 PageDesc *p;
1241 int n;
1242 #ifdef TARGET_HAS_PRECISE_SMC
1243 TranslationBlock *current_tb = NULL;
1244 CPUArchState *env = cpu_single_env;
1245 int current_tb_modified = 0;
1246 target_ulong current_pc = 0;
1247 target_ulong current_cs_base = 0;
1248 int current_flags = 0;
1249 #endif
1250
1251 addr &= TARGET_PAGE_MASK;
1252 p = page_find(addr >> TARGET_PAGE_BITS);
1253 if (!p)
1254 return;
1255 tb = p->first_tb;
1256 #ifdef TARGET_HAS_PRECISE_SMC
1257 if (tb && pc != 0) {
1258 current_tb = tb_find_pc(pc);
1259 }
1260 #endif
1261 while (tb != NULL) {
1262 n = (uintptr_t)tb & 3;
1263 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1264 #ifdef TARGET_HAS_PRECISE_SMC
1265 if (current_tb == tb &&
1266 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1267 /* If we are modifying the current TB, we must stop
1268 its execution. We could be more precise by checking
1269 that the modification is after the current PC, but it
1270 would require a specialized function to partially
1271 restore the CPU state */
1272
1273 current_tb_modified = 1;
1274 cpu_restore_state(current_tb, env, pc);
1275 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1276 &current_flags);
1277 }
1278 #endif /* TARGET_HAS_PRECISE_SMC */
1279 tb_phys_invalidate(tb, addr);
1280 tb = tb->page_next[n];
1281 }
1282 p->first_tb = NULL;
1283 #ifdef TARGET_HAS_PRECISE_SMC
1284 if (current_tb_modified) {
1285 /* we generate a block containing just the instruction
1286 modifying the memory. It will ensure that it cannot modify
1287 itself */
1288 env->current_tb = NULL;
1289 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1290 cpu_resume_from_signal(env, puc);
1291 }
1292 #endif
1293 }
1294 #endif
1295
1296 /* add the tb in the target page and protect it if necessary */
1297 static inline void tb_alloc_page(TranslationBlock *tb,
1298 unsigned int n, tb_page_addr_t page_addr)
1299 {
1300 PageDesc *p;
1301 #ifndef CONFIG_USER_ONLY
1302 bool page_already_protected;
1303 #endif
1304
1305 tb->page_addr[n] = page_addr;
1306 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1307 tb->page_next[n] = p->first_tb;
1308 #ifndef CONFIG_USER_ONLY
1309 page_already_protected = p->first_tb != NULL;
1310 #endif
1311 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1312 invalidate_page_bitmap(p);
1313
1314 #if defined(TARGET_HAS_SMC) || 1
1315
1316 #if defined(CONFIG_USER_ONLY)
1317 if (p->flags & PAGE_WRITE) {
1318 target_ulong addr;
1319 PageDesc *p2;
1320 int prot;
1321
1322 /* force the host page as non writable (writes will have a
1323 page fault + mprotect overhead) */
1324 page_addr &= qemu_host_page_mask;
1325 prot = 0;
1326 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1327 addr += TARGET_PAGE_SIZE) {
1328
1329 p2 = page_find (addr >> TARGET_PAGE_BITS);
1330 if (!p2)
1331 continue;
1332 prot |= p2->flags;
1333 p2->flags &= ~PAGE_WRITE;
1334 }
1335 mprotect(g2h(page_addr), qemu_host_page_size,
1336 (prot & PAGE_BITS) & ~PAGE_WRITE);
1337 #ifdef DEBUG_TB_INVALIDATE
1338 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1339 page_addr);
1340 #endif
1341 }
1342 #else
1343 /* if some code is already present, then the pages are already
1344 protected. So we handle the case where only the first TB is
1345 allocated in a physical page */
1346 if (!page_already_protected) {
1347 tlb_protect_code(page_addr);
1348 }
1349 #endif
1350
1351 #endif /* TARGET_HAS_SMC */
1352 }
1353
1354 /* add a new TB and link it to the physical page tables. phys_page2 is
1355 (-1) to indicate that only one page contains the TB. */
1356 void tb_link_page(TranslationBlock *tb,
1357 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1358 {
1359 unsigned int h;
1360 TranslationBlock **ptb;
1361
1362 /* Grab the mmap lock to stop another thread invalidating this TB
1363 before we are done. */
1364 mmap_lock();
1365 /* add in the physical hash table */
1366 h = tb_phys_hash_func(phys_pc);
1367 ptb = &tb_phys_hash[h];
1368 tb->phys_hash_next = *ptb;
1369 *ptb = tb;
1370
1371 /* add in the page list */
1372 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1373 if (phys_page2 != -1)
1374 tb_alloc_page(tb, 1, phys_page2);
1375 else
1376 tb->page_addr[1] = -1;
1377
1378 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1379 tb->jmp_next[0] = NULL;
1380 tb->jmp_next[1] = NULL;
1381
1382 /* init original jump addresses */
1383 if (tb->tb_next_offset[0] != 0xffff)
1384 tb_reset_jump(tb, 0);
1385 if (tb->tb_next_offset[1] != 0xffff)
1386 tb_reset_jump(tb, 1);
1387
1388 #ifdef DEBUG_TB_CHECK
1389 tb_page_check();
1390 #endif
1391 mmap_unlock();
1392 }
1393
1394 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1395 tb[1].tc_ptr. Return NULL if not found */
1396 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1397 {
1398 int m_min, m_max, m;
1399 uintptr_t v;
1400 TranslationBlock *tb;
1401
1402 if (nb_tbs <= 0)
1403 return NULL;
1404 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1405 tc_ptr >= (uintptr_t)code_gen_ptr) {
1406 return NULL;
1407 }
1408 /* binary search (cf Knuth) */
1409 m_min = 0;
1410 m_max = nb_tbs - 1;
1411 while (m_min <= m_max) {
1412 m = (m_min + m_max) >> 1;
1413 tb = &tbs[m];
1414 v = (uintptr_t)tb->tc_ptr;
1415 if (v == tc_ptr)
1416 return tb;
1417 else if (tc_ptr < v) {
1418 m_max = m - 1;
1419 } else {
1420 m_min = m + 1;
1421 }
1422 }
1423 return &tbs[m_max];
1424 }
1425
1426 static void tb_reset_jump_recursive(TranslationBlock *tb);
1427
1428 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1429 {
1430 TranslationBlock *tb1, *tb_next, **ptb;
1431 unsigned int n1;
1432
1433 tb1 = tb->jmp_next[n];
1434 if (tb1 != NULL) {
1435 /* find head of list */
1436 for(;;) {
1437 n1 = (uintptr_t)tb1 & 3;
1438 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1439 if (n1 == 2)
1440 break;
1441 tb1 = tb1->jmp_next[n1];
1442 }
1443 /* we are now sure now that tb jumps to tb1 */
1444 tb_next = tb1;
1445
1446 /* remove tb from the jmp_first list */
1447 ptb = &tb_next->jmp_first;
1448 for(;;) {
1449 tb1 = *ptb;
1450 n1 = (uintptr_t)tb1 & 3;
1451 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1452 if (n1 == n && tb1 == tb)
1453 break;
1454 ptb = &tb1->jmp_next[n1];
1455 }
1456 *ptb = tb->jmp_next[n];
1457 tb->jmp_next[n] = NULL;
1458
1459 /* suppress the jump to next tb in generated code */
1460 tb_reset_jump(tb, n);
1461
1462 /* suppress jumps in the tb on which we could have jumped */
1463 tb_reset_jump_recursive(tb_next);
1464 }
1465 }
1466
1467 static void tb_reset_jump_recursive(TranslationBlock *tb)
1468 {
1469 tb_reset_jump_recursive2(tb, 0);
1470 tb_reset_jump_recursive2(tb, 1);
1471 }
1472
1473 #if defined(TARGET_HAS_ICE)
1474 #if defined(CONFIG_USER_ONLY)
1475 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1476 {
1477 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1478 }
1479 #else
1480 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1481 {
1482 ram_addr_t ram_addr;
1483 MemoryRegionSection *section;
1484
1485 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1486 if (!(memory_region_is_ram(section->mr)
1487 || (section->mr->rom_device && section->mr->readable))) {
1488 return;
1489 }
1490 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1491 + memory_region_section_addr(section, addr);
1492 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1493 }
1494
1495 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1496 {
1497 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1498 (pc & ~TARGET_PAGE_MASK));
1499 }
1500 #endif
1501 #endif /* TARGET_HAS_ICE */
1502
1503 #if defined(CONFIG_USER_ONLY)
1504 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1505
1506 {
1507 }
1508
1509 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1510 int flags, CPUWatchpoint **watchpoint)
1511 {
1512 return -ENOSYS;
1513 }
1514 #else
1515 /* Add a watchpoint. */
1516 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1517 int flags, CPUWatchpoint **watchpoint)
1518 {
1519 target_ulong len_mask = ~(len - 1);
1520 CPUWatchpoint *wp;
1521
1522 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1523 if ((len & (len - 1)) || (addr & ~len_mask) ||
1524 len == 0 || len > TARGET_PAGE_SIZE) {
1525 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1526 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1527 return -EINVAL;
1528 }
1529 wp = g_malloc(sizeof(*wp));
1530
1531 wp->vaddr = addr;
1532 wp->len_mask = len_mask;
1533 wp->flags = flags;
1534
1535 /* keep all GDB-injected watchpoints in front */
1536 if (flags & BP_GDB)
1537 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1538 else
1539 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1540
1541 tlb_flush_page(env, addr);
1542
1543 if (watchpoint)
1544 *watchpoint = wp;
1545 return 0;
1546 }
1547
1548 /* Remove a specific watchpoint. */
1549 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1550 int flags)
1551 {
1552 target_ulong len_mask = ~(len - 1);
1553 CPUWatchpoint *wp;
1554
1555 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1556 if (addr == wp->vaddr && len_mask == wp->len_mask
1557 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1558 cpu_watchpoint_remove_by_ref(env, wp);
1559 return 0;
1560 }
1561 }
1562 return -ENOENT;
1563 }
1564
1565 /* Remove a specific watchpoint by reference. */
1566 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1567 {
1568 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1569
1570 tlb_flush_page(env, watchpoint->vaddr);
1571
1572 g_free(watchpoint);
1573 }
1574
1575 /* Remove all matching watchpoints. */
1576 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1577 {
1578 CPUWatchpoint *wp, *next;
1579
1580 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1581 if (wp->flags & mask)
1582 cpu_watchpoint_remove_by_ref(env, wp);
1583 }
1584 }
1585 #endif
1586
1587 /* Add a breakpoint. */
1588 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1589 CPUBreakpoint **breakpoint)
1590 {
1591 #if defined(TARGET_HAS_ICE)
1592 CPUBreakpoint *bp;
1593
1594 bp = g_malloc(sizeof(*bp));
1595
1596 bp->pc = pc;
1597 bp->flags = flags;
1598
1599 /* keep all GDB-injected breakpoints in front */
1600 if (flags & BP_GDB)
1601 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1602 else
1603 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1604
1605 breakpoint_invalidate(env, pc);
1606
1607 if (breakpoint)
1608 *breakpoint = bp;
1609 return 0;
1610 #else
1611 return -ENOSYS;
1612 #endif
1613 }
1614
1615 /* Remove a specific breakpoint. */
1616 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1617 {
1618 #if defined(TARGET_HAS_ICE)
1619 CPUBreakpoint *bp;
1620
1621 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1622 if (bp->pc == pc && bp->flags == flags) {
1623 cpu_breakpoint_remove_by_ref(env, bp);
1624 return 0;
1625 }
1626 }
1627 return -ENOENT;
1628 #else
1629 return -ENOSYS;
1630 #endif
1631 }
1632
1633 /* Remove a specific breakpoint by reference. */
1634 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1635 {
1636 #if defined(TARGET_HAS_ICE)
1637 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1638
1639 breakpoint_invalidate(env, breakpoint->pc);
1640
1641 g_free(breakpoint);
1642 #endif
1643 }
1644
1645 /* Remove all matching breakpoints. */
1646 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1647 {
1648 #if defined(TARGET_HAS_ICE)
1649 CPUBreakpoint *bp, *next;
1650
1651 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1652 if (bp->flags & mask)
1653 cpu_breakpoint_remove_by_ref(env, bp);
1654 }
1655 #endif
1656 }
1657
1658 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1659 CPU loop after each instruction */
1660 void cpu_single_step(CPUArchState *env, int enabled)
1661 {
1662 #if defined(TARGET_HAS_ICE)
1663 if (env->singlestep_enabled != enabled) {
1664 env->singlestep_enabled = enabled;
1665 if (kvm_enabled())
1666 kvm_update_guest_debug(env, 0);
1667 else {
1668 /* must flush all the translated code to avoid inconsistencies */
1669 /* XXX: only flush what is necessary */
1670 tb_flush(env);
1671 }
1672 }
1673 #endif
1674 }
1675
1676 /* enable or disable low levels log */
1677 void cpu_set_log(int log_flags)
1678 {
1679 loglevel = log_flags;
1680 if (loglevel && !logfile) {
1681 logfile = fopen(logfilename, log_append ? "a" : "w");
1682 if (!logfile) {
1683 perror(logfilename);
1684 _exit(1);
1685 }
1686 #if !defined(CONFIG_SOFTMMU)
1687 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1688 {
1689 static char logfile_buf[4096];
1690 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1691 }
1692 #elif defined(_WIN32)
1693 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1694 setvbuf(logfile, NULL, _IONBF, 0);
1695 #else
1696 setvbuf(logfile, NULL, _IOLBF, 0);
1697 #endif
1698 log_append = 1;
1699 }
1700 if (!loglevel && logfile) {
1701 fclose(logfile);
1702 logfile = NULL;
1703 }
1704 }
1705
1706 void cpu_set_log_filename(const char *filename)
1707 {
1708 logfilename = strdup(filename);
1709 if (logfile) {
1710 fclose(logfile);
1711 logfile = NULL;
1712 }
1713 cpu_set_log(loglevel);
1714 }
1715
1716 static void cpu_unlink_tb(CPUArchState *env)
1717 {
1718 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1719 problem and hope the cpu will stop of its own accord. For userspace
1720 emulation this often isn't actually as bad as it sounds. Often
1721 signals are used primarily to interrupt blocking syscalls. */
1722 TranslationBlock *tb;
1723 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1724
1725 spin_lock(&interrupt_lock);
1726 tb = env->current_tb;
1727 /* if the cpu is currently executing code, we must unlink it and
1728 all the potentially executing TB */
1729 if (tb) {
1730 env->current_tb = NULL;
1731 tb_reset_jump_recursive(tb);
1732 }
1733 spin_unlock(&interrupt_lock);
1734 }
1735
1736 #ifndef CONFIG_USER_ONLY
1737 /* mask must never be zero, except for A20 change call */
1738 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1739 {
1740 int old_mask;
1741
1742 old_mask = env->interrupt_request;
1743 env->interrupt_request |= mask;
1744
1745 /*
1746 * If called from iothread context, wake the target cpu in
1747 * case its halted.
1748 */
1749 if (!qemu_cpu_is_self(env)) {
1750 qemu_cpu_kick(env);
1751 return;
1752 }
1753
1754 if (use_icount) {
1755 env->icount_decr.u16.high = 0xffff;
1756 if (!can_do_io(env)
1757 && (mask & ~old_mask) != 0) {
1758 cpu_abort(env, "Raised interrupt while not in I/O function");
1759 }
1760 } else {
1761 cpu_unlink_tb(env);
1762 }
1763 }
1764
1765 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1766
1767 #else /* CONFIG_USER_ONLY */
1768
1769 void cpu_interrupt(CPUArchState *env, int mask)
1770 {
1771 env->interrupt_request |= mask;
1772 cpu_unlink_tb(env);
1773 }
1774 #endif /* CONFIG_USER_ONLY */
1775
1776 void cpu_reset_interrupt(CPUArchState *env, int mask)
1777 {
1778 env->interrupt_request &= ~mask;
1779 }
1780
1781 void cpu_exit(CPUArchState *env)
1782 {
1783 env->exit_request = 1;
1784 cpu_unlink_tb(env);
1785 }
1786
1787 const CPULogItem cpu_log_items[] = {
1788 { CPU_LOG_TB_OUT_ASM, "out_asm",
1789 "show generated host assembly code for each compiled TB" },
1790 { CPU_LOG_TB_IN_ASM, "in_asm",
1791 "show target assembly code for each compiled TB" },
1792 { CPU_LOG_TB_OP, "op",
1793 "show micro ops for each compiled TB" },
1794 { CPU_LOG_TB_OP_OPT, "op_opt",
1795 "show micro ops "
1796 #ifdef TARGET_I386
1797 "before eflags optimization and "
1798 #endif
1799 "after liveness analysis" },
1800 { CPU_LOG_INT, "int",
1801 "show interrupts/exceptions in short format" },
1802 { CPU_LOG_EXEC, "exec",
1803 "show trace before each executed TB (lots of logs)" },
1804 { CPU_LOG_TB_CPU, "cpu",
1805 "show CPU state before block translation" },
1806 #ifdef TARGET_I386
1807 { CPU_LOG_PCALL, "pcall",
1808 "show protected mode far calls/returns/exceptions" },
1809 { CPU_LOG_RESET, "cpu_reset",
1810 "show CPU state before CPU resets" },
1811 #endif
1812 #ifdef DEBUG_IOPORT
1813 { CPU_LOG_IOPORT, "ioport",
1814 "show all i/o ports accesses" },
1815 #endif
1816 { 0, NULL, NULL },
1817 };
1818
1819 static int cmp1(const char *s1, int n, const char *s2)
1820 {
1821 if (strlen(s2) != n)
1822 return 0;
1823 return memcmp(s1, s2, n) == 0;
1824 }
1825
1826 /* takes a comma separated list of log masks. Return 0 if error. */
1827 int cpu_str_to_log_mask(const char *str)
1828 {
1829 const CPULogItem *item;
1830 int mask;
1831 const char *p, *p1;
1832
1833 p = str;
1834 mask = 0;
1835 for(;;) {
1836 p1 = strchr(p, ',');
1837 if (!p1)
1838 p1 = p + strlen(p);
1839 if(cmp1(p,p1-p,"all")) {
1840 for(item = cpu_log_items; item->mask != 0; item++) {
1841 mask |= item->mask;
1842 }
1843 } else {
1844 for(item = cpu_log_items; item->mask != 0; item++) {
1845 if (cmp1(p, p1 - p, item->name))
1846 goto found;
1847 }
1848 return 0;
1849 }
1850 found:
1851 mask |= item->mask;
1852 if (*p1 != ',')
1853 break;
1854 p = p1 + 1;
1855 }
1856 return mask;
1857 }
1858
1859 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1860 {
1861 va_list ap;
1862 va_list ap2;
1863
1864 va_start(ap, fmt);
1865 va_copy(ap2, ap);
1866 fprintf(stderr, "qemu: fatal: ");
1867 vfprintf(stderr, fmt, ap);
1868 fprintf(stderr, "\n");
1869 #ifdef TARGET_I386
1870 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1871 #else
1872 cpu_dump_state(env, stderr, fprintf, 0);
1873 #endif
1874 if (qemu_log_enabled()) {
1875 qemu_log("qemu: fatal: ");
1876 qemu_log_vprintf(fmt, ap2);
1877 qemu_log("\n");
1878 #ifdef TARGET_I386
1879 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1880 #else
1881 log_cpu_state(env, 0);
1882 #endif
1883 qemu_log_flush();
1884 qemu_log_close();
1885 }
1886 va_end(ap2);
1887 va_end(ap);
1888 #if defined(CONFIG_USER_ONLY)
1889 {
1890 struct sigaction act;
1891 sigfillset(&act.sa_mask);
1892 act.sa_handler = SIG_DFL;
1893 sigaction(SIGABRT, &act, NULL);
1894 }
1895 #endif
1896 abort();
1897 }
1898
1899 CPUArchState *cpu_copy(CPUArchState *env)
1900 {
1901 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1902 CPUArchState *next_cpu = new_env->next_cpu;
1903 int cpu_index = new_env->cpu_index;
1904 #if defined(TARGET_HAS_ICE)
1905 CPUBreakpoint *bp;
1906 CPUWatchpoint *wp;
1907 #endif
1908
1909 memcpy(new_env, env, sizeof(CPUArchState));
1910
1911 /* Preserve chaining and index. */
1912 new_env->next_cpu = next_cpu;
1913 new_env->cpu_index = cpu_index;
1914
1915 /* Clone all break/watchpoints.
1916 Note: Once we support ptrace with hw-debug register access, make sure
1917 BP_CPU break/watchpoints are handled correctly on clone. */
1918 QTAILQ_INIT(&env->breakpoints);
1919 QTAILQ_INIT(&env->watchpoints);
1920 #if defined(TARGET_HAS_ICE)
1921 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1922 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1923 }
1924 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1925 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1926 wp->flags, NULL);
1927 }
1928 #endif
1929
1930 return new_env;
1931 }
1932
1933 #if !defined(CONFIG_USER_ONLY)
1934 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1935 {
1936 unsigned int i;
1937
1938 /* Discard jump cache entries for any tb which might potentially
1939 overlap the flushed page. */
1940 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1941 memset (&env->tb_jmp_cache[i], 0,
1942 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1943
1944 i = tb_jmp_cache_hash_page(addr);
1945 memset (&env->tb_jmp_cache[i], 0,
1946 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1947 }
1948
1949 /* Note: start and end must be within the same ram block. */
1950 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1951 int dirty_flags)
1952 {
1953 uintptr_t length, start1;
1954
1955 start &= TARGET_PAGE_MASK;
1956 end = TARGET_PAGE_ALIGN(end);
1957
1958 length = end - start;
1959 if (length == 0)
1960 return;
1961 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1962
1963 /* we modify the TLB cache so that the dirty bit will be set again
1964 when accessing the range */
1965 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1966 /* Check that we don't span multiple blocks - this breaks the
1967 address comparisons below. */
1968 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1969 != (end - 1) - start) {
1970 abort();
1971 }
1972 cpu_tlb_reset_dirty_all(start1, length);
1973 }
1974
1975 int cpu_physical_memory_set_dirty_tracking(int enable)
1976 {
1977 int ret = 0;
1978 in_migration = enable;
1979 return ret;
1980 }
1981
1982 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1983 MemoryRegionSection *section,
1984 target_ulong vaddr,
1985 target_phys_addr_t paddr,
1986 int prot,
1987 target_ulong *address)
1988 {
1989 target_phys_addr_t iotlb;
1990 CPUWatchpoint *wp;
1991
1992 if (memory_region_is_ram(section->mr)) {
1993 /* Normal RAM. */
1994 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1995 + memory_region_section_addr(section, paddr);
1996 if (!section->readonly) {
1997 iotlb |= phys_section_notdirty;
1998 } else {
1999 iotlb |= phys_section_rom;
2000 }
2001 } else {
2002 /* IO handlers are currently passed a physical address.
2003 It would be nice to pass an offset from the base address
2004 of that region. This would avoid having to special case RAM,
2005 and avoid full address decoding in every device.
2006 We can't use the high bits of pd for this because
2007 IO_MEM_ROMD uses these as a ram address. */
2008 iotlb = section - phys_sections;
2009 iotlb += memory_region_section_addr(section, paddr);
2010 }
2011
2012 /* Make accesses to pages with watchpoints go via the
2013 watchpoint trap routines. */
2014 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2015 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2016 /* Avoid trapping reads of pages with a write breakpoint. */
2017 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2018 iotlb = phys_section_watch + paddr;
2019 *address |= TLB_MMIO;
2020 break;
2021 }
2022 }
2023 }
2024
2025 return iotlb;
2026 }
2027
2028 #else
2029 /*
2030 * Walks guest process memory "regions" one by one
2031 * and calls callback function 'fn' for each region.
2032 */
2033
2034 struct walk_memory_regions_data
2035 {
2036 walk_memory_regions_fn fn;
2037 void *priv;
2038 uintptr_t start;
2039 int prot;
2040 };
2041
2042 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2043 abi_ulong end, int new_prot)
2044 {
2045 if (data->start != -1ul) {
2046 int rc = data->fn(data->priv, data->start, end, data->prot);
2047 if (rc != 0) {
2048 return rc;
2049 }
2050 }
2051
2052 data->start = (new_prot ? end : -1ul);
2053 data->prot = new_prot;
2054
2055 return 0;
2056 }
2057
2058 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2059 abi_ulong base, int level, void **lp)
2060 {
2061 abi_ulong pa;
2062 int i, rc;
2063
2064 if (*lp == NULL) {
2065 return walk_memory_regions_end(data, base, 0);
2066 }
2067
2068 if (level == 0) {
2069 PageDesc *pd = *lp;
2070 for (i = 0; i < L2_SIZE; ++i) {
2071 int prot = pd[i].flags;
2072
2073 pa = base | (i << TARGET_PAGE_BITS);
2074 if (prot != data->prot) {
2075 rc = walk_memory_regions_end(data, pa, prot);
2076 if (rc != 0) {
2077 return rc;
2078 }
2079 }
2080 }
2081 } else {
2082 void **pp = *lp;
2083 for (i = 0; i < L2_SIZE; ++i) {
2084 pa = base | ((abi_ulong)i <<
2085 (TARGET_PAGE_BITS + L2_BITS * level));
2086 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2087 if (rc != 0) {
2088 return rc;
2089 }
2090 }
2091 }
2092
2093 return 0;
2094 }
2095
2096 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2097 {
2098 struct walk_memory_regions_data data;
2099 uintptr_t i;
2100
2101 data.fn = fn;
2102 data.priv = priv;
2103 data.start = -1ul;
2104 data.prot = 0;
2105
2106 for (i = 0; i < V_L1_SIZE; i++) {
2107 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2108 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2109 if (rc != 0) {
2110 return rc;
2111 }
2112 }
2113
2114 return walk_memory_regions_end(&data, 0, 0);
2115 }
2116
2117 static int dump_region(void *priv, abi_ulong start,
2118 abi_ulong end, unsigned long prot)
2119 {
2120 FILE *f = (FILE *)priv;
2121
2122 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2123 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2124 start, end, end - start,
2125 ((prot & PAGE_READ) ? 'r' : '-'),
2126 ((prot & PAGE_WRITE) ? 'w' : '-'),
2127 ((prot & PAGE_EXEC) ? 'x' : '-'));
2128
2129 return (0);
2130 }
2131
2132 /* dump memory mappings */
2133 void page_dump(FILE *f)
2134 {
2135 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2136 "start", "end", "size", "prot");
2137 walk_memory_regions(f, dump_region);
2138 }
2139
2140 int page_get_flags(target_ulong address)
2141 {
2142 PageDesc *p;
2143
2144 p = page_find(address >> TARGET_PAGE_BITS);
2145 if (!p)
2146 return 0;
2147 return p->flags;
2148 }
2149
2150 /* Modify the flags of a page and invalidate the code if necessary.
2151 The flag PAGE_WRITE_ORG is positioned automatically depending
2152 on PAGE_WRITE. The mmap_lock should already be held. */
2153 void page_set_flags(target_ulong start, target_ulong end, int flags)
2154 {
2155 target_ulong addr, len;
2156
2157 /* This function should never be called with addresses outside the
2158 guest address space. If this assert fires, it probably indicates
2159 a missing call to h2g_valid. */
2160 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2161 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2162 #endif
2163 assert(start < end);
2164
2165 start = start & TARGET_PAGE_MASK;
2166 end = TARGET_PAGE_ALIGN(end);
2167
2168 if (flags & PAGE_WRITE) {
2169 flags |= PAGE_WRITE_ORG;
2170 }
2171
2172 for (addr = start, len = end - start;
2173 len != 0;
2174 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2175 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2176
2177 /* If the write protection bit is set, then we invalidate
2178 the code inside. */
2179 if (!(p->flags & PAGE_WRITE) &&
2180 (flags & PAGE_WRITE) &&
2181 p->first_tb) {
2182 tb_invalidate_phys_page(addr, 0, NULL);
2183 }
2184 p->flags = flags;
2185 }
2186 }
2187
2188 int page_check_range(target_ulong start, target_ulong len, int flags)
2189 {
2190 PageDesc *p;
2191 target_ulong end;
2192 target_ulong addr;
2193
2194 /* This function should never be called with addresses outside the
2195 guest address space. If this assert fires, it probably indicates
2196 a missing call to h2g_valid. */
2197 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2198 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2199 #endif
2200
2201 if (len == 0) {
2202 return 0;
2203 }
2204 if (start + len - 1 < start) {
2205 /* We've wrapped around. */
2206 return -1;
2207 }
2208
2209 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2210 start = start & TARGET_PAGE_MASK;
2211
2212 for (addr = start, len = end - start;
2213 len != 0;
2214 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2215 p = page_find(addr >> TARGET_PAGE_BITS);
2216 if( !p )
2217 return -1;
2218 if( !(p->flags & PAGE_VALID) )
2219 return -1;
2220
2221 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2222 return -1;
2223 if (flags & PAGE_WRITE) {
2224 if (!(p->flags & PAGE_WRITE_ORG))
2225 return -1;
2226 /* unprotect the page if it was put read-only because it
2227 contains translated code */
2228 if (!(p->flags & PAGE_WRITE)) {
2229 if (!page_unprotect(addr, 0, NULL))
2230 return -1;
2231 }
2232 return 0;
2233 }
2234 }
2235 return 0;
2236 }
2237
2238 /* called from signal handler: invalidate the code and unprotect the
2239 page. Return TRUE if the fault was successfully handled. */
2240 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2241 {
2242 unsigned int prot;
2243 PageDesc *p;
2244 target_ulong host_start, host_end, addr;
2245
2246 /* Technically this isn't safe inside a signal handler. However we
2247 know this only ever happens in a synchronous SEGV handler, so in
2248 practice it seems to be ok. */
2249 mmap_lock();
2250
2251 p = page_find(address >> TARGET_PAGE_BITS);
2252 if (!p) {
2253 mmap_unlock();
2254 return 0;
2255 }
2256
2257 /* if the page was really writable, then we change its
2258 protection back to writable */
2259 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2260 host_start = address & qemu_host_page_mask;
2261 host_end = host_start + qemu_host_page_size;
2262
2263 prot = 0;
2264 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2265 p = page_find(addr >> TARGET_PAGE_BITS);
2266 p->flags |= PAGE_WRITE;
2267 prot |= p->flags;
2268
2269 /* and since the content will be modified, we must invalidate
2270 the corresponding translated code. */
2271 tb_invalidate_phys_page(addr, pc, puc);
2272 #ifdef DEBUG_TB_CHECK
2273 tb_invalidate_check(addr);
2274 #endif
2275 }
2276 mprotect((void *)g2h(host_start), qemu_host_page_size,
2277 prot & PAGE_BITS);
2278
2279 mmap_unlock();
2280 return 1;
2281 }
2282 mmap_unlock();
2283 return 0;
2284 }
2285 #endif /* defined(CONFIG_USER_ONLY) */
2286
2287 #if !defined(CONFIG_USER_ONLY)
2288
2289 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2290 typedef struct subpage_t {
2291 MemoryRegion iomem;
2292 target_phys_addr_t base;
2293 uint16_t sub_section[TARGET_PAGE_SIZE];
2294 } subpage_t;
2295
2296 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2297 uint16_t section);
2298 static subpage_t *subpage_init(target_phys_addr_t base);
2299 static void destroy_page_desc(uint16_t section_index)
2300 {
2301 MemoryRegionSection *section = &phys_sections[section_index];
2302 MemoryRegion *mr = section->mr;
2303
2304 if (mr->subpage) {
2305 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2306 memory_region_destroy(&subpage->iomem);
2307 g_free(subpage);
2308 }
2309 }
2310
2311 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2312 {
2313 unsigned i;
2314 PhysPageEntry *p;
2315
2316 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2317 return;
2318 }
2319
2320 p = phys_map_nodes[lp->ptr];
2321 for (i = 0; i < L2_SIZE; ++i) {
2322 if (!p[i].is_leaf) {
2323 destroy_l2_mapping(&p[i], level - 1);
2324 } else {
2325 destroy_page_desc(p[i].ptr);
2326 }
2327 }
2328 lp->is_leaf = 0;
2329 lp->ptr = PHYS_MAP_NODE_NIL;
2330 }
2331
2332 static void destroy_all_mappings(void)
2333 {
2334 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2335 phys_map_nodes_reset();
2336 }
2337
2338 static uint16_t phys_section_add(MemoryRegionSection *section)
2339 {
2340 if (phys_sections_nb == phys_sections_nb_alloc) {
2341 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2342 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2343 phys_sections_nb_alloc);
2344 }
2345 phys_sections[phys_sections_nb] = *section;
2346 return phys_sections_nb++;
2347 }
2348
2349 static void phys_sections_clear(void)
2350 {
2351 phys_sections_nb = 0;
2352 }
2353
2354 /* register physical memory.
2355 For RAM, 'size' must be a multiple of the target page size.
2356 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2357 io memory page. The address used when calling the IO function is
2358 the offset from the start of the region, plus region_offset. Both
2359 start_addr and region_offset are rounded down to a page boundary
2360 before calculating this offset. This should not be a problem unless
2361 the low bits of start_addr and region_offset differ. */
2362 static void register_subpage(MemoryRegionSection *section)
2363 {
2364 subpage_t *subpage;
2365 target_phys_addr_t base = section->offset_within_address_space
2366 & TARGET_PAGE_MASK;
2367 MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
2368 MemoryRegionSection subsection = {
2369 .offset_within_address_space = base,
2370 .size = TARGET_PAGE_SIZE,
2371 };
2372 target_phys_addr_t start, end;
2373
2374 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2375
2376 if (!(existing->mr->subpage)) {
2377 subpage = subpage_init(base);
2378 subsection.mr = &subpage->iomem;
2379 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2380 phys_section_add(&subsection));
2381 } else {
2382 subpage = container_of(existing->mr, subpage_t, iomem);
2383 }
2384 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2385 end = start + section->size;
2386 subpage_register(subpage, start, end, phys_section_add(section));
2387 }
2388
2389
2390 static void register_multipage(MemoryRegionSection *section)
2391 {
2392 target_phys_addr_t start_addr = section->offset_within_address_space;
2393 ram_addr_t size = section->size;
2394 target_phys_addr_t addr;
2395 uint16_t section_index = phys_section_add(section);
2396
2397 assert(size);
2398
2399 addr = start_addr;
2400 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2401 section_index);
2402 }
2403
2404 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2405 bool readonly)
2406 {
2407 MemoryRegionSection now = *section, remain = *section;
2408
2409 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2410 || (now.size < TARGET_PAGE_SIZE)) {
2411 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2412 - now.offset_within_address_space,
2413 now.size);
2414 register_subpage(&now);
2415 remain.size -= now.size;
2416 remain.offset_within_address_space += now.size;
2417 remain.offset_within_region += now.size;
2418 }
2419 now = remain;
2420 now.size &= TARGET_PAGE_MASK;
2421 if (now.size) {
2422 register_multipage(&now);
2423 remain.size -= now.size;
2424 remain.offset_within_address_space += now.size;
2425 remain.offset_within_region += now.size;
2426 }
2427 now = remain;
2428 if (now.size) {
2429 register_subpage(&now);
2430 }
2431 }
2432
2433
2434 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2435 {
2436 if (kvm_enabled())
2437 kvm_coalesce_mmio_region(addr, size);
2438 }
2439
2440 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2441 {
2442 if (kvm_enabled())
2443 kvm_uncoalesce_mmio_region(addr, size);
2444 }
2445
2446 void qemu_flush_coalesced_mmio_buffer(void)
2447 {
2448 if (kvm_enabled())
2449 kvm_flush_coalesced_mmio_buffer();
2450 }
2451
2452 #if defined(__linux__) && !defined(TARGET_S390X)
2453
2454 #include <sys/vfs.h>
2455
2456 #define HUGETLBFS_MAGIC 0x958458f6
2457
2458 static long gethugepagesize(const char *path)
2459 {
2460 struct statfs fs;
2461 int ret;
2462
2463 do {
2464 ret = statfs(path, &fs);
2465 } while (ret != 0 && errno == EINTR);
2466
2467 if (ret != 0) {
2468 perror(path);
2469 return 0;
2470 }
2471
2472 if (fs.f_type != HUGETLBFS_MAGIC)
2473 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2474
2475 return fs.f_bsize;
2476 }
2477
2478 static void *file_ram_alloc(RAMBlock *block,
2479 ram_addr_t memory,
2480 const char *path)
2481 {
2482 char *filename;
2483 void *area;
2484 int fd;
2485 #ifdef MAP_POPULATE
2486 int flags;
2487 #endif
2488 unsigned long hpagesize;
2489
2490 hpagesize = gethugepagesize(path);
2491 if (!hpagesize) {
2492 return NULL;
2493 }
2494
2495 if (memory < hpagesize) {
2496 return NULL;
2497 }
2498
2499 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2500 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2501 return NULL;
2502 }
2503
2504 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2505 return NULL;
2506 }
2507
2508 fd = mkstemp(filename);
2509 if (fd < 0) {
2510 perror("unable to create backing store for hugepages");
2511 free(filename);
2512 return NULL;
2513 }
2514 unlink(filename);
2515 free(filename);
2516
2517 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2518
2519 /*
2520 * ftruncate is not supported by hugetlbfs in older
2521 * hosts, so don't bother bailing out on errors.
2522 * If anything goes wrong with it under other filesystems,
2523 * mmap will fail.
2524 */
2525 if (ftruncate(fd, memory))
2526 perror("ftruncate");
2527
2528 #ifdef MAP_POPULATE
2529 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2530 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2531 * to sidestep this quirk.
2532 */
2533 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2534 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2535 #else
2536 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2537 #endif
2538 if (area == MAP_FAILED) {
2539 perror("file_ram_alloc: can't mmap RAM pages");
2540 close(fd);
2541 return (NULL);
2542 }
2543 block->fd = fd;
2544 return area;
2545 }
2546 #endif
2547
2548 static ram_addr_t find_ram_offset(ram_addr_t size)
2549 {
2550 RAMBlock *block, *next_block;
2551 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2552
2553 if (QLIST_EMPTY(&ram_list.blocks))
2554 return 0;
2555
2556 QLIST_FOREACH(block, &ram_list.blocks, next) {
2557 ram_addr_t end, next = RAM_ADDR_MAX;
2558
2559 end = block->offset + block->length;
2560
2561 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2562 if (next_block->offset >= end) {
2563 next = MIN(next, next_block->offset);
2564 }
2565 }
2566 if (next - end >= size && next - end < mingap) {
2567 offset = end;
2568 mingap = next - end;
2569 }
2570 }
2571
2572 if (offset == RAM_ADDR_MAX) {
2573 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2574 (uint64_t)size);
2575 abort();
2576 }
2577
2578 return offset;
2579 }
2580
2581 static ram_addr_t last_ram_offset(void)
2582 {
2583 RAMBlock *block;
2584 ram_addr_t last = 0;
2585
2586 QLIST_FOREACH(block, &ram_list.blocks, next)
2587 last = MAX(last, block->offset + block->length);
2588
2589 return last;
2590 }
2591
2592 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2593 {
2594 RAMBlock *new_block, *block;
2595
2596 new_block = NULL;
2597 QLIST_FOREACH(block, &ram_list.blocks, next) {
2598 if (block->offset == addr) {
2599 new_block = block;
2600 break;
2601 }
2602 }
2603 assert(new_block);
2604 assert(!new_block->idstr[0]);
2605
2606 if (dev) {
2607 char *id = qdev_get_dev_path(dev);
2608 if (id) {
2609 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2610 g_free(id);
2611 }
2612 }
2613 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2614
2615 QLIST_FOREACH(block, &ram_list.blocks, next) {
2616 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2617 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2618 new_block->idstr);
2619 abort();
2620 }
2621 }
2622 }
2623
2624 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2625 MemoryRegion *mr)
2626 {
2627 RAMBlock *new_block;
2628
2629 size = TARGET_PAGE_ALIGN(size);
2630 new_block = g_malloc0(sizeof(*new_block));
2631
2632 new_block->mr = mr;
2633 new_block->offset = find_ram_offset(size);
2634 if (host) {
2635 new_block->host = host;
2636 new_block->flags |= RAM_PREALLOC_MASK;
2637 } else {
2638 if (mem_path) {
2639 #if defined (__linux__) && !defined(TARGET_S390X)
2640 new_block->host = file_ram_alloc(new_block, size, mem_path);
2641 if (!new_block->host) {
2642 new_block->host = qemu_vmalloc(size);
2643 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2644 }
2645 #else
2646 fprintf(stderr, "-mem-path option unsupported\n");
2647 exit(1);
2648 #endif
2649 } else {
2650 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2651 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2652 an system defined value, which is at least 256GB. Larger systems
2653 have larger values. We put the guest between the end of data
2654 segment (system break) and this value. We use 32GB as a base to
2655 have enough room for the system break to grow. */
2656 new_block->host = mmap((void*)0x800000000, size,
2657 PROT_EXEC|PROT_READ|PROT_WRITE,
2658 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2659 if (new_block->host == MAP_FAILED) {
2660 fprintf(stderr, "Allocating RAM failed\n");
2661 abort();
2662 }
2663 #else
2664 if (xen_enabled()) {
2665 xen_ram_alloc(new_block->offset, size, mr);
2666 } else {
2667 new_block->host = qemu_vmalloc(size);
2668 }
2669 #endif
2670 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2671 }
2672 }
2673 new_block->length = size;
2674
2675 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2676
2677 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2678 last_ram_offset() >> TARGET_PAGE_BITS);
2679 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2680 0xff, size >> TARGET_PAGE_BITS);
2681
2682 if (kvm_enabled())
2683 kvm_setup_guest_memory(new_block->host, size);
2684
2685 return new_block->offset;
2686 }
2687
2688 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2689 {
2690 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2691 }
2692
2693 void qemu_ram_free_from_ptr(ram_addr_t addr)
2694 {
2695 RAMBlock *block;
2696
2697 QLIST_FOREACH(block, &ram_list.blocks, next) {
2698 if (addr == block->offset) {
2699 QLIST_REMOVE(block, next);
2700 g_free(block);
2701 return;
2702 }
2703 }
2704 }
2705
2706 void qemu_ram_free(ram_addr_t addr)
2707 {
2708 RAMBlock *block;
2709
2710 QLIST_FOREACH(block, &ram_list.blocks, next) {
2711 if (addr == block->offset) {
2712 QLIST_REMOVE(block, next);
2713 if (block->flags & RAM_PREALLOC_MASK) {
2714 ;
2715 } else if (mem_path) {
2716 #if defined (__linux__) && !defined(TARGET_S390X)
2717 if (block->fd) {
2718 munmap(block->host, block->length);
2719 close(block->fd);
2720 } else {
2721 qemu_vfree(block->host);
2722 }
2723 #else
2724 abort();
2725 #endif
2726 } else {
2727 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2728 munmap(block->host, block->length);
2729 #else
2730 if (xen_enabled()) {
2731 xen_invalidate_map_cache_entry(block->host);
2732 } else {
2733 qemu_vfree(block->host);
2734 }
2735 #endif
2736 }
2737 g_free(block);
2738 return;
2739 }
2740 }
2741
2742 }
2743
2744 #ifndef _WIN32
2745 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2746 {
2747 RAMBlock *block;
2748 ram_addr_t offset;
2749 int flags;
2750 void *area, *vaddr;
2751
2752 QLIST_FOREACH(block, &ram_list.blocks, next) {
2753 offset = addr - block->offset;
2754 if (offset < block->length) {
2755 vaddr = block->host + offset;
2756 if (block->flags & RAM_PREALLOC_MASK) {
2757 ;
2758 } else {
2759 flags = MAP_FIXED;
2760 munmap(vaddr, length);
2761 if (mem_path) {
2762 #if defined(__linux__) && !defined(TARGET_S390X)
2763 if (block->fd) {
2764 #ifdef MAP_POPULATE
2765 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2766 MAP_PRIVATE;
2767 #else
2768 flags |= MAP_PRIVATE;
2769 #endif
2770 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2771 flags, block->fd, offset);
2772 } else {
2773 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2774 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2775 flags, -1, 0);
2776 }
2777 #else
2778 abort();
2779 #endif
2780 } else {
2781 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2782 flags |= MAP_SHARED | MAP_ANONYMOUS;
2783 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2784 flags, -1, 0);
2785 #else
2786 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2787 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2788 flags, -1, 0);
2789 #endif
2790 }
2791 if (area != vaddr) {
2792 fprintf(stderr, "Could not remap addr: "
2793 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2794 length, addr);
2795 exit(1);
2796 }
2797 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2798 }
2799 return;
2800 }
2801 }
2802 }
2803 #endif /* !_WIN32 */
2804
2805 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2806 With the exception of the softmmu code in this file, this should
2807 only be used for local memory (e.g. video ram) that the device owns,
2808 and knows it isn't going to access beyond the end of the block.
2809
2810 It should not be used for general purpose DMA.
2811 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2812 */
2813 void *qemu_get_ram_ptr(ram_addr_t addr)
2814 {
2815 RAMBlock *block;
2816
2817 QLIST_FOREACH(block, &ram_list.blocks, next) {
2818 if (addr - block->offset < block->length) {
2819 /* Move this entry to to start of the list. */
2820 if (block != QLIST_FIRST(&ram_list.blocks)) {
2821 QLIST_REMOVE(block, next);
2822 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2823 }
2824 if (xen_enabled()) {
2825 /* We need to check if the requested address is in the RAM
2826 * because we don't want to map the entire memory in QEMU.
2827 * In that case just map until the end of the page.
2828 */
2829 if (block->offset == 0) {
2830 return xen_map_cache(addr, 0, 0);
2831 } else if (block->host == NULL) {
2832 block->host =
2833 xen_map_cache(block->offset, block->length, 1);
2834 }
2835 }
2836 return block->host + (addr - block->offset);
2837 }
2838 }
2839
2840 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2841 abort();
2842
2843 return NULL;
2844 }
2845
2846 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2847 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2848 */
2849 void *qemu_safe_ram_ptr(ram_addr_t addr)
2850 {
2851 RAMBlock *block;
2852
2853 QLIST_FOREACH(block, &ram_list.blocks, next) {
2854 if (addr - block->offset < block->length) {
2855 if (xen_enabled()) {
2856 /* We need to check if the requested address is in the RAM
2857 * because we don't want to map the entire memory in QEMU.
2858 * In that case just map until the end of the page.
2859 */
2860 if (block->offset == 0) {
2861 return xen_map_cache(addr, 0, 0);
2862 } else if (block->host == NULL) {
2863 block->host =
2864 xen_map_cache(block->offset, block->length, 1);
2865 }
2866 }
2867 return block->host + (addr - block->offset);
2868 }
2869 }
2870
2871 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2872 abort();
2873
2874 return NULL;
2875 }
2876
2877 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2878 * but takes a size argument */
2879 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2880 {
2881 if (*size == 0) {
2882 return NULL;
2883 }
2884 if (xen_enabled()) {
2885 return xen_map_cache(addr, *size, 1);
2886 } else {
2887 RAMBlock *block;
2888
2889 QLIST_FOREACH(block, &ram_list.blocks, next) {
2890 if (addr - block->offset < block->length) {
2891 if (addr - block->offset + *size > block->length)
2892 *size = block->length - addr + block->offset;
2893 return block->host + (addr - block->offset);
2894 }
2895 }
2896
2897 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2898 abort();
2899 }
2900 }
2901
2902 void qemu_put_ram_ptr(void *addr)
2903 {
2904 trace_qemu_put_ram_ptr(addr);
2905 }
2906
2907 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2908 {
2909 RAMBlock *block;
2910 uint8_t *host = ptr;
2911
2912 if (xen_enabled()) {
2913 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2914 return 0;
2915 }
2916
2917 QLIST_FOREACH(block, &ram_list.blocks, next) {
2918 /* This case append when the block is not mapped. */
2919 if (block->host == NULL) {
2920 continue;
2921 }
2922 if (host - block->host < block->length) {
2923 *ram_addr = block->offset + (host - block->host);
2924 return 0;
2925 }
2926 }
2927
2928 return -1;
2929 }
2930
2931 /* Some of the softmmu routines need to translate from a host pointer
2932 (typically a TLB entry) back to a ram offset. */
2933 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2934 {
2935 ram_addr_t ram_addr;
2936
2937 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2938 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2939 abort();
2940 }
2941 return ram_addr;
2942 }
2943
2944 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2945 unsigned size)
2946 {
2947 #ifdef DEBUG_UNASSIGNED
2948 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2949 #endif
2950 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2951 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2952 #endif
2953 return 0;
2954 }
2955
2956 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2957 uint64_t val, unsigned size)
2958 {
2959 #ifdef DEBUG_UNASSIGNED
2960 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2961 #endif
2962 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2963 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2964 #endif
2965 }
2966
2967 static const MemoryRegionOps unassigned_mem_ops = {
2968 .read = unassigned_mem_read,
2969 .write = unassigned_mem_write,
2970 .endianness = DEVICE_NATIVE_ENDIAN,
2971 };
2972
2973 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2974 unsigned size)
2975 {
2976 abort();
2977 }
2978
2979 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2980 uint64_t value, unsigned size)
2981 {
2982 abort();
2983 }
2984
2985 static const MemoryRegionOps error_mem_ops = {
2986 .read = error_mem_read,
2987 .write = error_mem_write,
2988 .endianness = DEVICE_NATIVE_ENDIAN,
2989 };
2990
2991 static const MemoryRegionOps rom_mem_ops = {
2992 .read = error_mem_read,
2993 .write = unassigned_mem_write,
2994 .endianness = DEVICE_NATIVE_ENDIAN,
2995 };
2996
2997 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2998 uint64_t val, unsigned size)
2999 {
3000 int dirty_flags;
3001 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3002 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3003 #if !defined(CONFIG_USER_ONLY)
3004 tb_invalidate_phys_page_fast(ram_addr, size);
3005 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3006 #endif
3007 }
3008 switch (size) {
3009 case 1:
3010 stb_p(qemu_get_ram_ptr(ram_addr), val);
3011 break;
3012 case 2:
3013 stw_p(qemu_get_ram_ptr(ram_addr), val);
3014 break;
3015 case 4:
3016 stl_p(qemu_get_ram_ptr(ram_addr), val);
3017 break;
3018 default:
3019 abort();
3020 }
3021 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3022 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3023 /* we remove the notdirty callback only if the code has been
3024 flushed */
3025 if (dirty_flags == 0xff)
3026 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3027 }
3028
3029 static const MemoryRegionOps notdirty_mem_ops = {
3030 .read = error_mem_read,
3031 .write = notdirty_mem_write,
3032 .endianness = DEVICE_NATIVE_ENDIAN,
3033 };
3034
3035 /* Generate a debug exception if a watchpoint has been hit. */
3036 static void check_watchpoint(int offset, int len_mask, int flags)
3037 {
3038 CPUArchState *env = cpu_single_env;
3039 target_ulong pc, cs_base;
3040 TranslationBlock *tb;
3041 target_ulong vaddr;
3042 CPUWatchpoint *wp;
3043 int cpu_flags;
3044
3045 if (env->watchpoint_hit) {
3046 /* We re-entered the check after replacing the TB. Now raise
3047 * the debug interrupt so that is will trigger after the
3048 * current instruction. */
3049 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3050 return;
3051 }
3052 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3053 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3054 if ((vaddr == (wp->vaddr & len_mask) ||
3055 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3056 wp->flags |= BP_WATCHPOINT_HIT;
3057 if (!env->watchpoint_hit) {
3058 env->watchpoint_hit = wp;
3059 tb = tb_find_pc(env->mem_io_pc);
3060 if (!tb) {
3061 cpu_abort(env, "check_watchpoint: could not find TB for "
3062 "pc=%p", (void *)env->mem_io_pc);
3063 }
3064 cpu_restore_state(tb, env, env->mem_io_pc);
3065 tb_phys_invalidate(tb, -1);
3066 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3067 env->exception_index = EXCP_DEBUG;
3068 cpu_loop_exit(env);
3069 } else {
3070 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3071 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3072 cpu_resume_from_signal(env, NULL);
3073 }
3074 }
3075 } else {
3076 wp->flags &= ~BP_WATCHPOINT_HIT;
3077 }
3078 }
3079 }
3080
3081 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3082 so these check for a hit then pass through to the normal out-of-line
3083 phys routines. */
3084 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3085 unsigned size)
3086 {
3087 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3088 switch (size) {
3089 case 1: return ldub_phys(addr);
3090 case 2: return lduw_phys(addr);
3091 case 4: return ldl_phys(addr);
3092 default: abort();
3093 }
3094 }
3095
3096 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3097 uint64_t val, unsigned size)
3098 {
3099 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3100 switch (size) {
3101 case 1:
3102 stb_phys(addr, val);
3103 break;
3104 case 2:
3105 stw_phys(addr, val);
3106 break;
3107 case 4:
3108 stl_phys(addr, val);
3109 break;
3110 default: abort();
3111 }
3112 }
3113
3114 static const MemoryRegionOps watch_mem_ops = {
3115 .read = watch_mem_read,
3116 .write = watch_mem_write,
3117 .endianness = DEVICE_NATIVE_ENDIAN,
3118 };
3119
3120 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3121 unsigned len)
3122 {
3123 subpage_t *mmio = opaque;
3124 unsigned int idx = SUBPAGE_IDX(addr);
3125 MemoryRegionSection *section;
3126 #if defined(DEBUG_SUBPAGE)
3127 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3128 mmio, len, addr, idx);
3129 #endif
3130
3131 section = &phys_sections[mmio->sub_section[idx]];
3132 addr += mmio->base;
3133 addr -= section->offset_within_address_space;
3134 addr += section->offset_within_region;
3135 return io_mem_read(section->mr, addr, len);
3136 }
3137
3138 static void subpage_write(void *opaque, target_phys_addr_t addr,
3139 uint64_t value, unsigned len)
3140 {
3141 subpage_t *mmio = opaque;
3142 unsigned int idx = SUBPAGE_IDX(addr);
3143 MemoryRegionSection *section;
3144 #if defined(DEBUG_SUBPAGE)
3145 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3146 " idx %d value %"PRIx64"\n",
3147 __func__, mmio, len, addr, idx, value);
3148 #endif
3149
3150 section = &phys_sections[mmio->sub_section[idx]];
3151 addr += mmio->base;
3152 addr -= section->offset_within_address_space;
3153 addr += section->offset_within_region;
3154 io_mem_write(section->mr, addr, value, len);
3155 }
3156
3157 static const MemoryRegionOps subpage_ops = {
3158 .read = subpage_read,
3159 .write = subpage_write,
3160 .endianness = DEVICE_NATIVE_ENDIAN,
3161 };
3162
3163 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3164 unsigned size)
3165 {
3166 ram_addr_t raddr = addr;
3167 void *ptr = qemu_get_ram_ptr(raddr);
3168 switch (size) {
3169 case 1: return ldub_p(ptr);
3170 case 2: return lduw_p(ptr);
3171 case 4: return ldl_p(ptr);
3172 default: abort();
3173 }
3174 }
3175
3176 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3177 uint64_t value, unsigned size)
3178 {
3179 ram_addr_t raddr = addr;
3180 void *ptr = qemu_get_ram_ptr(raddr);
3181 switch (size) {
3182 case 1: return stb_p(ptr, value);
3183 case 2: return stw_p(ptr, value);
3184 case 4: return stl_p(ptr, value);
3185 default: abort();
3186 }
3187 }
3188
3189 static const MemoryRegionOps subpage_ram_ops = {
3190 .read = subpage_ram_read,
3191 .write = subpage_ram_write,
3192 .endianness = DEVICE_NATIVE_ENDIAN,
3193 };
3194
3195 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3196 uint16_t section)
3197 {
3198 int idx, eidx;
3199
3200 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3201 return -1;
3202 idx = SUBPAGE_IDX(start);
3203 eidx = SUBPAGE_IDX(end);
3204 #if defined(DEBUG_SUBPAGE)
3205 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3206 mmio, start, end, idx, eidx, memory);
3207 #endif
3208 if (memory_region_is_ram(phys_sections[section].mr)) {
3209 MemoryRegionSection new_section = phys_sections[section];
3210 new_section.mr = &io_mem_subpage_ram;
3211 section = phys_section_add(&new_section);
3212 }
3213 for (; idx <= eidx; idx++) {
3214 mmio->sub_section[idx] = section;
3215 }
3216
3217 return 0;
3218 }
3219
3220 static subpage_t *subpage_init(target_phys_addr_t base)
3221 {
3222 subpage_t *mmio;
3223
3224 mmio = g_malloc0(sizeof(subpage_t));
3225
3226 mmio->base = base;
3227 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3228 "subpage", TARGET_PAGE_SIZE);
3229 mmio->iomem.subpage = true;
3230 #if defined(DEBUG_SUBPAGE)
3231 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3232 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3233 #endif
3234 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3235
3236 return mmio;
3237 }
3238
3239 static uint16_t dummy_section(MemoryRegion *mr)
3240 {
3241 MemoryRegionSection section = {
3242 .mr = mr,
3243 .offset_within_address_space = 0,
3244 .offset_within_region = 0,
3245 .size = UINT64_MAX,
3246 };
3247
3248 return phys_section_add(&section);
3249 }
3250
3251 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3252 {
3253 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3254 }
3255
3256 static void io_mem_init(void)
3257 {
3258 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3259 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3260 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3261 "unassigned", UINT64_MAX);
3262 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3263 "notdirty", UINT64_MAX);
3264 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3265 "subpage-ram", UINT64_MAX);
3266 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3267 "watch", UINT64_MAX);
3268 }
3269
3270 static void core_begin(MemoryListener *listener)
3271 {
3272 destroy_all_mappings();
3273 phys_sections_clear();
3274 phys_map.ptr = PHYS_MAP_NODE_NIL;
3275 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3276 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3277 phys_section_rom = dummy_section(&io_mem_rom);
3278 phys_section_watch = dummy_section(&io_mem_watch);
3279 }
3280
3281 static void core_commit(MemoryListener *listener)
3282 {
3283 CPUArchState *env;
3284
3285 /* since each CPU stores ram addresses in its TLB cache, we must
3286 reset the modified entries */
3287 /* XXX: slow ! */
3288 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3289 tlb_flush(env, 1);
3290 }
3291 }
3292
3293 static void core_region_add(MemoryListener *listener,
3294 MemoryRegionSection *section)
3295 {
3296 cpu_register_physical_memory_log(section, section->readonly);
3297 }
3298
3299 static void core_region_del(MemoryListener *listener,
3300 MemoryRegionSection *section)
3301 {
3302 }
3303
3304 static void core_region_nop(MemoryListener *listener,
3305 MemoryRegionSection *section)
3306 {
3307 cpu_register_physical_memory_log(section, section->readonly);
3308 }
3309
3310 static void core_log_start(MemoryListener *listener,
3311 MemoryRegionSection *section)
3312 {
3313 }
3314
3315 static void core_log_stop(MemoryListener *listener,
3316 MemoryRegionSection *section)
3317 {
3318 }
3319
3320 static void core_log_sync(MemoryListener *listener,
3321 MemoryRegionSection *section)
3322 {
3323 }
3324
3325 static void core_log_global_start(MemoryListener *listener)
3326 {
3327 cpu_physical_memory_set_dirty_tracking(1);
3328 }
3329
3330 static void core_log_global_stop(MemoryListener *listener)
3331 {
3332 cpu_physical_memory_set_dirty_tracking(0);
3333 }
3334
3335 static void core_eventfd_add(MemoryListener *listener,
3336 MemoryRegionSection *section,
3337 bool match_data, uint64_t data, int fd)
3338 {
3339 }
3340
3341 static void core_eventfd_del(MemoryListener *listener,
3342 MemoryRegionSection *section,
3343 bool match_data, uint64_t data, int fd)
3344 {
3345 }
3346
3347 static void io_begin(MemoryListener *listener)
3348 {
3349 }
3350
3351 static void io_commit(MemoryListener *listener)
3352 {
3353 }
3354
3355 static void io_region_add(MemoryListener *listener,
3356 MemoryRegionSection *section)
3357 {
3358 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3359
3360 mrio->mr = section->mr;
3361 mrio->offset = section->offset_within_region;
3362 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3363 section->offset_within_address_space, section->size);
3364 ioport_register(&mrio->iorange);
3365 }
3366
3367 static void io_region_del(MemoryListener *listener,
3368 MemoryRegionSection *section)
3369 {
3370 isa_unassign_ioport(section->offset_within_address_space, section->size);
3371 }
3372
3373 static void io_region_nop(MemoryListener *listener,
3374 MemoryRegionSection *section)
3375 {
3376 }
3377
3378 static void io_log_start(MemoryListener *listener,
3379 MemoryRegionSection *section)
3380 {
3381 }
3382
3383 static void io_log_stop(MemoryListener *listener,
3384 MemoryRegionSection *section)
3385 {
3386 }
3387
3388 static void io_log_sync(MemoryListener *listener,
3389 MemoryRegionSection *section)
3390 {
3391 }
3392
3393 static void io_log_global_start(MemoryListener *listener)
3394 {
3395 }
3396
3397 static void io_log_global_stop(MemoryListener *listener)
3398 {
3399 }
3400
3401 static void io_eventfd_add(MemoryListener *listener,
3402 MemoryRegionSection *section,
3403 bool match_data, uint64_t data, int fd)
3404 {
3405 }
3406
3407 static void io_eventfd_del(MemoryListener *listener,
3408 MemoryRegionSection *section,
3409 bool match_data, uint64_t data, int fd)
3410 {
3411 }
3412
3413 static MemoryListener core_memory_listener = {
3414 .begin = core_begin,
3415 .commit = core_commit,
3416 .region_add = core_region_add,
3417 .region_del = core_region_del,
3418 .region_nop = core_region_nop,
3419 .log_start = core_log_start,
3420 .log_stop = core_log_stop,
3421 .log_sync = core_log_sync,
3422 .log_global_start = core_log_global_start,
3423 .log_global_stop = core_log_global_stop,
3424 .eventfd_add = core_eventfd_add,
3425 .eventfd_del = core_eventfd_del,
3426 .priority = 0,
3427 };
3428
3429 static MemoryListener io_memory_listener = {
3430 .begin = io_begin,
3431 .commit = io_commit,
3432 .region_add = io_region_add,
3433 .region_del = io_region_del,
3434 .region_nop = io_region_nop,
3435 .log_start = io_log_start,
3436 .log_stop = io_log_stop,
3437 .log_sync = io_log_sync,
3438 .log_global_start = io_log_global_start,
3439 .log_global_stop = io_log_global_stop,
3440 .eventfd_add = io_eventfd_add,
3441 .eventfd_del = io_eventfd_del,
3442 .priority = 0,
3443 };
3444
3445 static void memory_map_init(void)
3446 {
3447 system_memory = g_malloc(sizeof(*system_memory));
3448 memory_region_init(system_memory, "system", INT64_MAX);
3449 set_system_memory_map(system_memory);
3450
3451 system_io = g_malloc(sizeof(*system_io));
3452 memory_region_init(system_io, "io", 65536);
3453 set_system_io_map(system_io);
3454
3455 memory_listener_register(&core_memory_listener, system_memory);
3456 memory_listener_register(&io_memory_listener, system_io);
3457 }
3458
3459 MemoryRegion *get_system_memory(void)
3460 {
3461 return system_memory;
3462 }
3463
3464 MemoryRegion *get_system_io(void)
3465 {
3466 return system_io;
3467 }
3468
3469 #endif /* !defined(CONFIG_USER_ONLY) */
3470
3471 /* physical memory access (slow version, mainly for debug) */
3472 #if defined(CONFIG_USER_ONLY)
3473 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3474 uint8_t *buf, int len, int is_write)
3475 {
3476 int l, flags;
3477 target_ulong page;
3478 void * p;
3479
3480 while (len > 0) {
3481 page = addr & TARGET_PAGE_MASK;
3482 l = (page + TARGET_PAGE_SIZE) - addr;
3483 if (l > len)
3484 l = len;
3485 flags = page_get_flags(page);
3486 if (!(flags & PAGE_VALID))
3487 return -1;
3488 if (is_write) {
3489 if (!(flags & PAGE_WRITE))
3490 return -1;
3491 /* XXX: this code should not depend on lock_user */
3492 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3493 return -1;
3494 memcpy(p, buf, l);
3495 unlock_user(p, addr, l);
3496 } else {
3497 if (!(flags & PAGE_READ))
3498 return -1;
3499 /* XXX: this code should not depend on lock_user */
3500 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3501 return -1;
3502 memcpy(buf, p, l);
3503 unlock_user(p, addr, 0);
3504 }
3505 len -= l;
3506 buf += l;
3507 addr += l;
3508 }
3509 return 0;
3510 }
3511
3512 #else
3513 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3514 int len, int is_write)
3515 {
3516 int l;
3517 uint8_t *ptr;
3518 uint32_t val;
3519 target_phys_addr_t page;
3520 MemoryRegionSection *section;
3521
3522 while (len > 0) {
3523 page = addr & TARGET_PAGE_MASK;
3524 l = (page + TARGET_PAGE_SIZE) - addr;
3525 if (l > len)
3526 l = len;
3527 section = phys_page_find(page >> TARGET_PAGE_BITS);
3528
3529 if (is_write) {
3530 if (!memory_region_is_ram(section->mr)) {
3531 target_phys_addr_t addr1;
3532 addr1 = memory_region_section_addr(section, addr);
3533 /* XXX: could force cpu_single_env to NULL to avoid
3534 potential bugs */
3535 if (l >= 4 && ((addr1 & 3) == 0)) {
3536 /* 32 bit write access */
3537 val = ldl_p(buf);
3538 io_mem_write(section->mr, addr1, val, 4);
3539 l = 4;
3540 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3541 /* 16 bit write access */
3542 val = lduw_p(buf);
3543 io_mem_write(section->mr, addr1, val, 2);
3544 l = 2;
3545 } else {
3546 /* 8 bit write access */
3547 val = ldub_p(buf);
3548 io_mem_write(section->mr, addr1, val, 1);
3549 l = 1;
3550 }
3551 } else if (!section->readonly) {
3552 ram_addr_t addr1;
3553 addr1 = memory_region_get_ram_addr(section->mr)
3554 + memory_region_section_addr(section, addr);
3555 /* RAM case */
3556 ptr = qemu_get_ram_ptr(addr1);
3557 memcpy(ptr, buf, l);
3558 if (!cpu_physical_memory_is_dirty(addr1)) {
3559 /* invalidate code */
3560 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3561 /* set dirty bit */
3562 cpu_physical_memory_set_dirty_flags(
3563 addr1, (0xff & ~CODE_DIRTY_FLAG));
3564 }
3565 qemu_put_ram_ptr(ptr);
3566 }
3567 } else {
3568 if (!(memory_region_is_ram(section->mr) ||
3569 memory_region_is_romd(section->mr))) {
3570 target_phys_addr_t addr1;
3571 /* I/O case */
3572 addr1 = memory_region_section_addr(section, addr);
3573 if (l >= 4 && ((addr1 & 3) == 0)) {
3574 /* 32 bit read access */
3575 val = io_mem_read(section->mr, addr1, 4);
3576 stl_p(buf, val);
3577 l = 4;
3578 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3579 /* 16 bit read access */
3580 val = io_mem_read(section->mr, addr1, 2);
3581 stw_p(buf, val);
3582 l = 2;
3583 } else {
3584 /* 8 bit read access */
3585 val = io_mem_read(section->mr, addr1, 1);
3586 stb_p(buf, val);
3587 l = 1;
3588 }
3589 } else {
3590 /* RAM case */
3591 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3592 + memory_region_section_addr(section,
3593 addr));
3594 memcpy(buf, ptr, l);
3595 qemu_put_ram_ptr(ptr);
3596 }
3597 }
3598 len -= l;
3599 buf += l;
3600 addr += l;
3601 }
3602 }
3603
3604 /* used for ROM loading : can write in RAM and ROM */
3605 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3606 const uint8_t *buf, int len)
3607 {
3608 int l;
3609 uint8_t *ptr;
3610 target_phys_addr_t page;
3611 MemoryRegionSection *section;
3612
3613 while (len > 0) {
3614 page = addr & TARGET_PAGE_MASK;
3615 l = (page + TARGET_PAGE_SIZE) - addr;
3616 if (l > len)
3617 l = len;
3618 section = phys_page_find(page >> TARGET_PAGE_BITS);
3619
3620 if (!(memory_region_is_ram(section->mr) ||
3621 memory_region_is_romd(section->mr))) {
3622 /* do nothing */
3623 } else {
3624 unsigned long addr1;
3625 addr1 = memory_region_get_ram_addr(section->mr)
3626 + memory_region_section_addr(section, addr);
3627 /* ROM/RAM case */
3628 ptr = qemu_get_ram_ptr(addr1);
3629 memcpy(ptr, buf, l);
3630 qemu_put_ram_ptr(ptr);
3631 }
3632 len -= l;
3633 buf += l;
3634 addr += l;
3635 }
3636 }
3637
3638 typedef struct {
3639 void *buffer;
3640 target_phys_addr_t addr;
3641 target_phys_addr_t len;
3642 } BounceBuffer;
3643
3644 static BounceBuffer bounce;
3645
3646 typedef struct MapClient {
3647 void *opaque;
3648 void (*callback)(void *opaque);
3649 QLIST_ENTRY(MapClient) link;
3650 } MapClient;
3651
3652 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3653 = QLIST_HEAD_INITIALIZER(map_client_list);
3654
3655 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3656 {
3657 MapClient *client = g_malloc(sizeof(*client));
3658
3659 client->opaque = opaque;
3660 client->callback = callback;
3661 QLIST_INSERT_HEAD(&map_client_list, client, link);
3662 return client;
3663 }
3664
3665 void cpu_unregister_map_client(void *_client)
3666 {
3667 MapClient *client = (MapClient *)_client;
3668
3669 QLIST_REMOVE(client, link);
3670 g_free(client);
3671 }
3672
3673 static void cpu_notify_map_clients(void)
3674 {
3675 MapClient *client;
3676
3677 while (!QLIST_EMPTY(&map_client_list)) {
3678 client = QLIST_FIRST(&map_client_list);
3679 client->callback(client->opaque);
3680 cpu_unregister_map_client(client);
3681 }
3682 }
3683
3684 /* Map a physical memory region into a host virtual address.
3685 * May map a subset of the requested range, given by and returned in *plen.
3686 * May return NULL if resources needed to perform the mapping are exhausted.
3687 * Use only for reads OR writes - not for read-modify-write operations.
3688 * Use cpu_register_map_client() to know when retrying the map operation is
3689 * likely to succeed.
3690 */
3691 void *cpu_physical_memory_map(target_phys_addr_t addr,
3692 target_phys_addr_t *plen,
3693 int is_write)
3694 {
3695 target_phys_addr_t len = *plen;
3696 target_phys_addr_t todo = 0;
3697 int l;
3698 target_phys_addr_t page;
3699 MemoryRegionSection *section;
3700 ram_addr_t raddr = RAM_ADDR_MAX;
3701 ram_addr_t rlen;
3702 void *ret;
3703
3704 while (len > 0) {
3705 page = addr & TARGET_PAGE_MASK;
3706 l = (page + TARGET_PAGE_SIZE) - addr;
3707 if (l > len)
3708 l = len;
3709 section = phys_page_find(page >> TARGET_PAGE_BITS);
3710
3711 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3712 if (todo || bounce.buffer) {
3713 break;
3714 }
3715 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3716 bounce.addr = addr;
3717 bounce.len = l;
3718 if (!is_write) {
3719 cpu_physical_memory_read(addr, bounce.buffer, l);
3720 }
3721
3722 *plen = l;
3723 return bounce.buffer;
3724 }
3725 if (!todo) {
3726 raddr = memory_region_get_ram_addr(section->mr)
3727 + memory_region_section_addr(section, addr);
3728 }
3729
3730 len -= l;
3731 addr += l;
3732 todo += l;
3733 }
3734 rlen = todo;
3735 ret = qemu_ram_ptr_length(raddr, &rlen);
3736 *plen = rlen;
3737 return ret;
3738 }
3739
3740 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3741 * Will also mark the memory as dirty if is_write == 1. access_len gives
3742 * the amount of memory that was actually read or written by the caller.
3743 */
3744 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3745 int is_write, target_phys_addr_t access_len)
3746 {
3747 if (buffer != bounce.buffer) {
3748 if (is_write) {
3749 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3750 while (access_len) {
3751 unsigned l;
3752 l = TARGET_PAGE_SIZE;
3753 if (l > access_len)
3754 l = access_len;
3755 if (!cpu_physical_memory_is_dirty(addr1)) {
3756 /* invalidate code */
3757 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3758 /* set dirty bit */
3759 cpu_physical_memory_set_dirty_flags(
3760 addr1, (0xff & ~CODE_DIRTY_FLAG));
3761 }
3762 addr1 += l;
3763 access_len -= l;
3764 }
3765 }
3766 if (xen_enabled()) {
3767 xen_invalidate_map_cache_entry(buffer);
3768 }
3769 return;
3770 }
3771 if (is_write) {
3772 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3773 }
3774 qemu_vfree(bounce.buffer);
3775 bounce.buffer = NULL;
3776 cpu_notify_map_clients();
3777 }
3778
3779 /* warning: addr must be aligned */
3780 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3781 enum device_endian endian)
3782 {
3783 uint8_t *ptr;
3784 uint32_t val;
3785 MemoryRegionSection *section;
3786
3787 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3788
3789 if (!(memory_region_is_ram(section->mr) ||
3790 memory_region_is_romd(section->mr))) {
3791 /* I/O case */
3792 addr = memory_region_section_addr(section, addr);
3793 val = io_mem_read(section->mr, addr, 4);
3794 #if defined(TARGET_WORDS_BIGENDIAN)
3795 if (endian == DEVICE_LITTLE_ENDIAN) {
3796 val = bswap32(val);
3797 }
3798 #else
3799 if (endian == DEVICE_BIG_ENDIAN) {
3800 val = bswap32(val);
3801 }
3802 #endif
3803 } else {
3804 /* RAM case */
3805 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3806 & TARGET_PAGE_MASK)
3807 + memory_region_section_addr(section, addr));
3808 switch (endian) {
3809 case DEVICE_LITTLE_ENDIAN:
3810 val = ldl_le_p(ptr);
3811 break;
3812 case DEVICE_BIG_ENDIAN:
3813 val = ldl_be_p(ptr);
3814 break;
3815 default:
3816 val = ldl_p(ptr);
3817 break;
3818 }
3819 }
3820 return val;
3821 }
3822
3823 uint32_t ldl_phys(target_phys_addr_t addr)
3824 {
3825 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3826 }
3827
3828 uint32_t ldl_le_phys(target_phys_addr_t addr)
3829 {
3830 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3831 }
3832
3833 uint32_t ldl_be_phys(target_phys_addr_t addr)
3834 {
3835 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3836 }
3837
3838 /* warning: addr must be aligned */
3839 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3840 enum device_endian endian)
3841 {
3842 uint8_t *ptr;
3843 uint64_t val;
3844 MemoryRegionSection *section;
3845
3846 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3847
3848 if (!(memory_region_is_ram(section->mr) ||
3849 memory_region_is_romd(section->mr))) {
3850 /* I/O case */
3851 addr = memory_region_section_addr(section, addr);
3852
3853 /* XXX This is broken when device endian != cpu endian.
3854 Fix and add "endian" variable check */
3855 #ifdef TARGET_WORDS_BIGENDIAN
3856 val = io_mem_read(section->mr, addr, 4) << 32;
3857 val |= io_mem_read(section->mr, addr + 4, 4);
3858 #else
3859 val = io_mem_read(section->mr, addr, 4);
3860 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3861 #endif
3862 } else {
3863 /* RAM case */
3864 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3865 & TARGET_PAGE_MASK)
3866 + memory_region_section_addr(section, addr));
3867 switch (endian) {
3868 case DEVICE_LITTLE_ENDIAN:
3869 val = ldq_le_p(ptr);
3870 break;
3871 case DEVICE_BIG_ENDIAN:
3872 val = ldq_be_p(ptr);
3873 break;
3874 default:
3875 val = ldq_p(ptr);
3876 break;
3877 }
3878 }
3879 return val;
3880 }
3881
3882 uint64_t ldq_phys(target_phys_addr_t addr)
3883 {
3884 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3885 }
3886
3887 uint64_t ldq_le_phys(target_phys_addr_t addr)
3888 {
3889 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3890 }
3891
3892 uint64_t ldq_be_phys(target_phys_addr_t addr)
3893 {
3894 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3895 }
3896
3897 /* XXX: optimize */
3898 uint32_t ldub_phys(target_phys_addr_t addr)
3899 {
3900 uint8_t val;
3901 cpu_physical_memory_read(addr, &val, 1);
3902 return val;
3903 }
3904
3905 /* warning: addr must be aligned */
3906 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3907 enum device_endian endian)
3908 {
3909 uint8_t *ptr;
3910 uint64_t val;
3911 MemoryRegionSection *section;
3912
3913 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3914
3915 if (!(memory_region_is_ram(section->mr) ||
3916 memory_region_is_romd(section->mr))) {
3917 /* I/O case */
3918 addr = memory_region_section_addr(section, addr);
3919 val = io_mem_read(section->mr, addr, 2);
3920 #if defined(TARGET_WORDS_BIGENDIAN)
3921 if (endian == DEVICE_LITTLE_ENDIAN) {
3922 val = bswap16(val);
3923 }
3924 #else
3925 if (endian == DEVICE_BIG_ENDIAN) {
3926 val = bswap16(val);
3927 }
3928 #endif
3929 } else {
3930 /* RAM case */
3931 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3932 & TARGET_PAGE_MASK)
3933 + memory_region_section_addr(section, addr));
3934 switch (endian) {
3935 case DEVICE_LITTLE_ENDIAN:
3936 val = lduw_le_p(ptr);
3937 break;
3938 case DEVICE_BIG_ENDIAN:
3939 val = lduw_be_p(ptr);
3940 break;
3941 default:
3942 val = lduw_p(ptr);
3943 break;
3944 }
3945 }
3946 return val;
3947 }
3948
3949 uint32_t lduw_phys(target_phys_addr_t addr)
3950 {
3951 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3952 }
3953
3954 uint32_t lduw_le_phys(target_phys_addr_t addr)
3955 {
3956 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3957 }
3958
3959 uint32_t lduw_be_phys(target_phys_addr_t addr)
3960 {
3961 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3962 }
3963
3964 /* warning: addr must be aligned. The ram page is not masked as dirty
3965 and the code inside is not invalidated. It is useful if the dirty
3966 bits are used to track modified PTEs */
3967 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3968 {
3969 uint8_t *ptr;
3970 MemoryRegionSection *section;
3971
3972 section = phys_page_find(addr >> TARGET_PAGE_BITS);
3973
3974 if (!memory_region_is_ram(section->mr) || section->readonly) {
3975 addr = memory_region_section_addr(section, addr);
3976 if (memory_region_is_ram(section->mr)) {
3977 section = &phys_sections[phys_section_rom];
3978 }
3979 io_mem_write(section->mr, addr, val, 4);
3980 } else {
3981 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3982 & TARGET_PAGE_MASK)
3983 + memory_region_section_addr(section, addr);
3984 ptr = qemu_get_ram_ptr(addr1);
3985 stl_p(ptr, val);
3986
3987 if (unlikely(in_migration)) {
3988 if (!cpu_physical_memory_is_dirty(addr1)) {
3989 /* invalidate code */
3990 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3991 /* set dirty bit */
3992 cpu_physical_memory_set_dirty_flags(
3993 addr1, (0xff & ~CODE_DIRTY_FLAG));
3994 }
3995 }
3996 }
3997 }
3998
3999 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4000 {
4001 uint8_t *ptr;
4002 MemoryRegionSection *section;
4003
4004 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4005
4006 if (!memory_region_is_ram(section->mr) || section->readonly) {
4007 addr = memory_region_section_addr(section, addr);
4008 if (memory_region_is_ram(section->mr)) {
4009 section = &phys_sections[phys_section_rom];
4010 }
4011 #ifdef TARGET_WORDS_BIGENDIAN
4012 io_mem_write(section->mr, addr, val >> 32, 4);
4013 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
4014 #else
4015 io_mem_write(section->mr, addr, (uint32_t)val, 4);
4016 io_mem_write(section->mr, addr + 4, val >> 32, 4);
4017 #endif
4018 } else {
4019 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
4020 & TARGET_PAGE_MASK)
4021 + memory_region_section_addr(section, addr));
4022 stq_p(ptr, val);
4023 }
4024 }
4025
4026 /* warning: addr must be aligned */
4027 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4028 enum device_endian endian)
4029 {
4030 uint8_t *ptr;
4031 MemoryRegionSection *section;
4032
4033 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4034
4035 if (!memory_region_is_ram(section->mr) || section->readonly) {
4036 addr = memory_region_section_addr(section, addr);
4037 if (memory_region_is_ram(section->mr)) {
4038 section = &phys_sections[phys_section_rom];
4039 }
4040 #if defined(TARGET_WORDS_BIGENDIAN)
4041 if (endian == DEVICE_LITTLE_ENDIAN) {
4042 val = bswap32(val);
4043 }
4044 #else
4045 if (endian == DEVICE_BIG_ENDIAN) {
4046 val = bswap32(val);
4047 }
4048 #endif
4049 io_mem_write(section->mr, addr, val, 4);
4050 } else {
4051 unsigned long addr1;
4052 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4053 + memory_region_section_addr(section, addr);
4054 /* RAM case */
4055 ptr = qemu_get_ram_ptr(addr1);
4056 switch (endian) {
4057 case DEVICE_LITTLE_ENDIAN:
4058 stl_le_p(ptr, val);
4059 break;
4060 case DEVICE_BIG_ENDIAN:
4061 stl_be_p(ptr, val);
4062 break;
4063 default:
4064 stl_p(ptr, val);
4065 break;
4066 }
4067 if (!cpu_physical_memory_is_dirty(addr1)) {
4068 /* invalidate code */
4069 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4070 /* set dirty bit */
4071 cpu_physical_memory_set_dirty_flags(addr1,
4072 (0xff & ~CODE_DIRTY_FLAG));
4073 }
4074 }
4075 }
4076
4077 void stl_phys(target_phys_addr_t addr, uint32_t val)
4078 {
4079 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4080 }
4081
4082 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4083 {
4084 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4085 }
4086
4087 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4088 {
4089 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4090 }
4091
4092 /* XXX: optimize */
4093 void stb_phys(target_phys_addr_t addr, uint32_t val)
4094 {
4095 uint8_t v = val;
4096 cpu_physical_memory_write(addr, &v, 1);
4097 }
4098
4099 /* warning: addr must be aligned */
4100 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4101 enum device_endian endian)
4102 {
4103 uint8_t *ptr;
4104 MemoryRegionSection *section;
4105
4106 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4107
4108 if (!memory_region_is_ram(section->mr) || section->readonly) {
4109 addr = memory_region_section_addr(section, addr);
4110 if (memory_region_is_ram(section->mr)) {
4111 section = &phys_sections[phys_section_rom];
4112 }
4113 #if defined(TARGET_WORDS_BIGENDIAN)
4114 if (endian == DEVICE_LITTLE_ENDIAN) {
4115 val = bswap16(val);
4116 }
4117 #else
4118 if (endian == DEVICE_BIG_ENDIAN) {
4119 val = bswap16(val);
4120 }
4121 #endif
4122 io_mem_write(section->mr, addr, val, 2);
4123 } else {
4124 unsigned long addr1;
4125 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4126 + memory_region_section_addr(section, addr);
4127 /* RAM case */
4128 ptr = qemu_get_ram_ptr(addr1);
4129 switch (endian) {
4130 case DEVICE_LITTLE_ENDIAN:
4131 stw_le_p(ptr, val);
4132 break;
4133 case DEVICE_BIG_ENDIAN:
4134 stw_be_p(ptr, val);
4135 break;
4136 default:
4137 stw_p(ptr, val);
4138 break;
4139 }
4140 if (!cpu_physical_memory_is_dirty(addr1)) {
4141 /* invalidate code */
4142 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4143 /* set dirty bit */
4144 cpu_physical_memory_set_dirty_flags(addr1,
4145 (0xff & ~CODE_DIRTY_FLAG));
4146 }
4147 }
4148 }
4149
4150 void stw_phys(target_phys_addr_t addr, uint32_t val)
4151 {
4152 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4153 }
4154
4155 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4156 {
4157 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4158 }
4159
4160 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4161 {
4162 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4163 }
4164
4165 /* XXX: optimize */
4166 void stq_phys(target_phys_addr_t addr, uint64_t val)
4167 {
4168 val = tswap64(val);
4169 cpu_physical_memory_write(addr, &val, 8);
4170 }
4171
4172 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4173 {
4174 val = cpu_to_le64(val);
4175 cpu_physical_memory_write(addr, &val, 8);
4176 }
4177
4178 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4179 {
4180 val = cpu_to_be64(val);
4181 cpu_physical_memory_write(addr, &val, 8);
4182 }
4183
4184 /* virtual memory access for debug (includes writing to ROM) */
4185 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4186 uint8_t *buf, int len, int is_write)
4187 {
4188 int l;
4189 target_phys_addr_t phys_addr;
4190 target_ulong page;
4191
4192 while (len > 0) {
4193 page = addr & TARGET_PAGE_MASK;
4194 phys_addr = cpu_get_phys_page_debug(env, page);
4195 /* if no physical page mapped, return an error */
4196 if (phys_addr == -1)
4197 return -1;
4198 l = (page + TARGET_PAGE_SIZE) - addr;
4199 if (l > len)
4200 l = len;
4201 phys_addr += (addr & ~TARGET_PAGE_MASK);
4202 if (is_write)
4203 cpu_physical_memory_write_rom(phys_addr, buf, l);
4204 else
4205 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4206 len -= l;
4207 buf += l;
4208 addr += l;
4209 }
4210 return 0;
4211 }
4212 #endif
4213
4214 /* in deterministic execution mode, instructions doing device I/Os
4215 must be at the end of the TB */
4216 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4217 {
4218 TranslationBlock *tb;
4219 uint32_t n, cflags;
4220 target_ulong pc, cs_base;
4221 uint64_t flags;
4222
4223 tb = tb_find_pc(retaddr);
4224 if (!tb) {
4225 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4226 (void *)retaddr);
4227 }
4228 n = env->icount_decr.u16.low + tb->icount;
4229 cpu_restore_state(tb, env, retaddr);
4230 /* Calculate how many instructions had been executed before the fault
4231 occurred. */
4232 n = n - env->icount_decr.u16.low;
4233 /* Generate a new TB ending on the I/O insn. */
4234 n++;
4235 /* On MIPS and SH, delay slot instructions can only be restarted if
4236 they were already the first instruction in the TB. If this is not
4237 the first instruction in a TB then re-execute the preceding
4238 branch. */
4239 #if defined(TARGET_MIPS)
4240 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4241 env->active_tc.PC -= 4;
4242 env->icount_decr.u16.low++;
4243 env->hflags &= ~MIPS_HFLAG_BMASK;
4244 }
4245 #elif defined(TARGET_SH4)
4246 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4247 && n > 1) {
4248 env->pc -= 2;
4249 env->icount_decr.u16.low++;
4250 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4251 }
4252 #endif
4253 /* This should never happen. */
4254 if (n > CF_COUNT_MASK)
4255 cpu_abort(env, "TB too big during recompile");
4256
4257 cflags = n | CF_LAST_IO;
4258 pc = tb->pc;
4259 cs_base = tb->cs_base;
4260 flags = tb->flags;
4261 tb_phys_invalidate(tb, -1);
4262 /* FIXME: In theory this could raise an exception. In practice
4263 we have already translated the block once so it's probably ok. */
4264 tb_gen_code(env, pc, cs_base, flags, cflags);
4265 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4266 the first in the TB) then we end up generating a whole new TB and
4267 repeating the fault, which is horribly inefficient.
4268 Better would be to execute just this insn uncached, or generate a
4269 second new TB. */
4270 cpu_resume_from_signal(env, NULL);
4271 }
4272
4273 #if !defined(CONFIG_USER_ONLY)
4274
4275 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4276 {
4277 int i, target_code_size, max_target_code_size;
4278 int direct_jmp_count, direct_jmp2_count, cross_page;
4279 TranslationBlock *tb;
4280
4281 target_code_size = 0;
4282 max_target_code_size = 0;
4283 cross_page = 0;
4284 direct_jmp_count = 0;
4285 direct_jmp2_count = 0;
4286 for(i = 0; i < nb_tbs; i++) {
4287 tb = &tbs[i];
4288 target_code_size += tb->size;
4289 if (tb->size > max_target_code_size)
4290 max_target_code_size = tb->size;
4291 if (tb->page_addr[1] != -1)
4292 cross_page++;
4293 if (tb->tb_next_offset[0] != 0xffff) {
4294 direct_jmp_count++;
4295 if (tb->tb_next_offset[1] != 0xffff) {
4296 direct_jmp2_count++;
4297 }
4298 }
4299 }
4300 /* XXX: avoid using doubles ? */
4301 cpu_fprintf(f, "Translation buffer state:\n");
4302 cpu_fprintf(f, "gen code size %td/%ld\n",
4303 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4304 cpu_fprintf(f, "TB count %d/%d\n",
4305 nb_tbs, code_gen_max_blocks);
4306 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4307 nb_tbs ? target_code_size / nb_tbs : 0,
4308 max_target_code_size);
4309 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4310 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4311 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4312 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4313 cross_page,
4314 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4315 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4316 direct_jmp_count,
4317 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4318 direct_jmp2_count,
4319 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4320 cpu_fprintf(f, "\nStatistics:\n");
4321 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4322 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4323 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4324 tcg_dump_info(f, cpu_fprintf);
4325 }
4326
4327 /*
4328 * A helper function for the _utterly broken_ virtio device model to find out if
4329 * it's running on a big endian machine. Don't do this at home kids!
4330 */
4331 bool virtio_is_big_endian(void);
4332 bool virtio_is_big_endian(void)
4333 {
4334 #if defined(TARGET_WORDS_BIGENDIAN)
4335 return true;
4336 #else
4337 return false;
4338 #endif
4339 }
4340
4341 #endif
4342
4343 #ifndef CONFIG_USER_ONLY
4344 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4345 {
4346 MemoryRegionSection *section;
4347
4348 section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
4349
4350 return !(memory_region_is_ram(section->mr) ||
4351 memory_region_is_romd(section->mr));
4352 }
4353 #endif