]> git.proxmox.com Git - qemu.git/blob - exec.c
cpus: Pass CPUState to cpu_is_stopped()
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
94
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
98
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
103
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
106
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
109
110 #endif
111
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
120
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
132
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
144
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
148
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
161
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
163
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
169
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
173
174 #if !defined(CONFIG_USER_ONLY)
175
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
182
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191
192 static MemoryRegion io_mem_watch;
193 #endif
194
195 /* statistics */
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
198
199 #ifdef _WIN32
200 static inline void map_exec(void *addr, long size)
201 {
202 DWORD old_protect;
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
205
206 }
207 #else
208 static inline void map_exec(void *addr, long size)
209 {
210 unsigned long start, end, page_size;
211
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
215
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
219
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
222 }
223 #endif
224
225 static void page_init(void)
226 {
227 /* NOTE: we can always suppose that qemu_host_page_size >=
228 TARGET_PAGE_SIZE */
229 #ifdef _WIN32
230 {
231 SYSTEM_INFO system_info;
232
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
235 }
236 #else
237 qemu_real_host_page_size = getpagesize();
238 #endif
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
244
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
246 {
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
249 int i, cnt;
250
251 freep = kinfo_getvmmap(getpid(), &cnt);
252 if (freep) {
253 mmap_lock();
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
256
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
261
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265 } else {
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267 endaddr = ~0ul;
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #endif
270 }
271 }
272 }
273 free(freep);
274 mmap_unlock();
275 }
276 #else
277 FILE *f;
278
279 last_brk = (unsigned long)sbrk(0);
280
281 f = fopen("/compat/linux/proc/self/maps", "r");
282 if (f) {
283 mmap_lock();
284
285 do {
286 unsigned long startaddr, endaddr;
287 int n;
288
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
290
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
296 } else {
297 endaddr = ~0ul;
298 }
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 }
301 } while (!feof(f));
302
303 fclose(f);
304 mmap_unlock();
305 }
306 #endif
307 }
308 #endif
309 }
310
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
312 {
313 PageDesc *pd;
314 void **lp;
315 int i;
316
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
320 do { \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
323 } while (0)
324 #else
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
327 #endif
328
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
331
332 /* Level 2..N-1. */
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334 void **p = *lp;
335
336 if (p == NULL) {
337 if (!alloc) {
338 return NULL;
339 }
340 ALLOC(p, sizeof(void *) * L2_SIZE);
341 *lp = p;
342 }
343
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
345 }
346
347 pd = *lp;
348 if (pd == NULL) {
349 if (!alloc) {
350 return NULL;
351 }
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353 *lp = pd;
354 }
355
356 #undef ALLOC
357
358 return pd + (index & (L2_SIZE - 1));
359 }
360
361 static inline PageDesc *page_find(tb_page_addr_t index)
362 {
363 return page_find_alloc(index, 0);
364 }
365
366 #if !defined(CONFIG_USER_ONLY)
367
368 static void phys_map_node_reserve(unsigned nodes)
369 {
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
377 }
378 }
379
380 static uint16_t phys_map_node_alloc(void)
381 {
382 unsigned i;
383 uint16_t ret;
384
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
391 }
392 return ret;
393 }
394
395 static void phys_map_nodes_reset(void)
396 {
397 phys_map_nodes_nb = 0;
398 }
399
400
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
403 int level)
404 {
405 PhysPageEntry *p;
406 int i;
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
408
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
412 if (level == 0) {
413 for (i = 0; i < L2_SIZE; i++) {
414 p[i].is_leaf = 1;
415 p[i].ptr = phys_section_unassigned;
416 }
417 }
418 } else {
419 p = phys_map_nodes[lp->ptr];
420 }
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
422
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
425 lp->is_leaf = true;
426 lp->ptr = leaf;
427 *index += step;
428 *nb -= step;
429 } else {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
431 }
432 ++lp;
433 }
434 }
435
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
438 uint16_t leaf)
439 {
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
442
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
444 }
445
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
447 {
448 PhysPageEntry lp = d->phys_map;
449 PhysPageEntry *p;
450 int i;
451 uint16_t s_index = phys_section_unassigned;
452
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
455 goto not_found;
456 }
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
459 }
460
461 s_index = lp.ptr;
462 not_found:
463 return &phys_sections[s_index];
464 }
465
466 bool memory_region_is_unassigned(MemoryRegion *mr)
467 {
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
471 }
472
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
475 #endif
476
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
483 #endif
484
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
490 # define USE_MMAP
491 #endif
492
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
496
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
509 #else
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
511 #endif
512
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
514
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
518
519 static inline size_t size_code_gen_buffer(size_t tb_size)
520 {
521 /* Size the buffer. */
522 if (tb_size == 0) {
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525 #else
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
531 #endif
532 }
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
535 }
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
538 }
539 code_gen_buffer_size = tb_size;
540 return tb_size;
541 }
542
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
546
547 static inline void *alloc_code_gen_buffer(void)
548 {
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
551 }
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
554 {
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556 uintptr_t start = 0;
557 void *buf;
558
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
570 flags |= MAP_32BIT;
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
574 }
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
579 # endif
580
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
584 }
585 #else
586 static inline void *alloc_code_gen_buffer(void)
587 {
588 void *buf = g_malloc(code_gen_buffer_size);
589 if (buf) {
590 map_exec(buf, code_gen_buffer_size);
591 }
592 return buf;
593 }
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
595
596 static inline void code_gen_alloc(size_t tb_size)
597 {
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602 exit(1);
603 }
604
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
609 as executable. */
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
612
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
617 }
618
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
621 size. */
622 void tcg_exec_init(unsigned long tb_size)
623 {
624 cpu_gen_init();
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628 page_init();
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
633 #endif
634 }
635
636 bool tcg_enabled(void)
637 {
638 return code_gen_buffer != NULL;
639 }
640
641 void cpu_exec_init_all(void)
642 {
643 #if !defined(CONFIG_USER_ONLY)
644 memory_map_init();
645 io_mem_init();
646 #endif
647 }
648
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
650
651 static int cpu_common_post_load(void *opaque, int version_id)
652 {
653 CPUArchState *env = opaque;
654
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
658 tlb_flush(env, 1);
659
660 return 0;
661 }
662
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675 #endif
676
677 CPUArchState *qemu_get_cpu(int cpu)
678 {
679 CPUArchState *env = first_cpu;
680
681 while (env) {
682 if (env->cpu_index == cpu)
683 break;
684 env = env->next_cpu;
685 }
686
687 return env;
688 }
689
690 void cpu_exec_init(CPUArchState *env)
691 {
692 CPUArchState **penv;
693 int cpu_index;
694
695 #if defined(CONFIG_USER_ONLY)
696 cpu_list_lock();
697 #endif
698 env->next_cpu = NULL;
699 penv = &first_cpu;
700 cpu_index = 0;
701 while (*penv != NULL) {
702 penv = &(*penv)->next_cpu;
703 cpu_index++;
704 }
705 env->cpu_index = cpu_index;
706 env->numa_node = 0;
707 QTAILQ_INIT(&env->breakpoints);
708 QTAILQ_INIT(&env->watchpoints);
709 #ifndef CONFIG_USER_ONLY
710 env->thread_id = qemu_get_thread_id();
711 #endif
712 *penv = env;
713 #if defined(CONFIG_USER_ONLY)
714 cpu_list_unlock();
715 #endif
716 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
717 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
718 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
719 cpu_save, cpu_load, env);
720 #endif
721 }
722
723 /* Allocate a new translation block. Flush the translation buffer if
724 too many translation blocks or too much generated code. */
725 static TranslationBlock *tb_alloc(target_ulong pc)
726 {
727 TranslationBlock *tb;
728
729 if (nb_tbs >= code_gen_max_blocks ||
730 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
731 return NULL;
732 tb = &tbs[nb_tbs++];
733 tb->pc = pc;
734 tb->cflags = 0;
735 return tb;
736 }
737
738 void tb_free(TranslationBlock *tb)
739 {
740 /* In practice this is mostly used for single use temporary TB
741 Ignore the hard cases and just back up if this TB happens to
742 be the last one generated. */
743 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
744 code_gen_ptr = tb->tc_ptr;
745 nb_tbs--;
746 }
747 }
748
749 static inline void invalidate_page_bitmap(PageDesc *p)
750 {
751 if (p->code_bitmap) {
752 g_free(p->code_bitmap);
753 p->code_bitmap = NULL;
754 }
755 p->code_write_count = 0;
756 }
757
758 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
759
760 static void page_flush_tb_1 (int level, void **lp)
761 {
762 int i;
763
764 if (*lp == NULL) {
765 return;
766 }
767 if (level == 0) {
768 PageDesc *pd = *lp;
769 for (i = 0; i < L2_SIZE; ++i) {
770 pd[i].first_tb = NULL;
771 invalidate_page_bitmap(pd + i);
772 }
773 } else {
774 void **pp = *lp;
775 for (i = 0; i < L2_SIZE; ++i) {
776 page_flush_tb_1 (level - 1, pp + i);
777 }
778 }
779 }
780
781 static void page_flush_tb(void)
782 {
783 int i;
784 for (i = 0; i < V_L1_SIZE; i++) {
785 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
786 }
787 }
788
789 /* flush all the translation blocks */
790 /* XXX: tb_flush is currently not thread safe */
791 void tb_flush(CPUArchState *env1)
792 {
793 CPUArchState *env;
794 #if defined(DEBUG_FLUSH)
795 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
796 (unsigned long)(code_gen_ptr - code_gen_buffer),
797 nb_tbs, nb_tbs > 0 ?
798 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
799 #endif
800 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
801 cpu_abort(env1, "Internal error: code buffer overflow\n");
802
803 nb_tbs = 0;
804
805 for(env = first_cpu; env != NULL; env = env->next_cpu) {
806 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
807 }
808
809 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
810 page_flush_tb();
811
812 code_gen_ptr = code_gen_buffer;
813 /* XXX: flush processor icache at this point if cache flush is
814 expensive */
815 tb_flush_count++;
816 }
817
818 #ifdef DEBUG_TB_CHECK
819
820 static void tb_invalidate_check(target_ulong address)
821 {
822 TranslationBlock *tb;
823 int i;
824 address &= TARGET_PAGE_MASK;
825 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
826 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
827 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
828 address >= tb->pc + tb->size)) {
829 printf("ERROR invalidate: address=" TARGET_FMT_lx
830 " PC=%08lx size=%04x\n",
831 address, (long)tb->pc, tb->size);
832 }
833 }
834 }
835 }
836
837 /* verify that all the pages have correct rights for code */
838 static void tb_page_check(void)
839 {
840 TranslationBlock *tb;
841 int i, flags1, flags2;
842
843 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
844 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
845 flags1 = page_get_flags(tb->pc);
846 flags2 = page_get_flags(tb->pc + tb->size - 1);
847 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
848 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
849 (long)tb->pc, tb->size, flags1, flags2);
850 }
851 }
852 }
853 }
854
855 #endif
856
857 /* invalidate one TB */
858 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
859 int next_offset)
860 {
861 TranslationBlock *tb1;
862 for(;;) {
863 tb1 = *ptb;
864 if (tb1 == tb) {
865 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
866 break;
867 }
868 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
869 }
870 }
871
872 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
873 {
874 TranslationBlock *tb1;
875 unsigned int n1;
876
877 for(;;) {
878 tb1 = *ptb;
879 n1 = (uintptr_t)tb1 & 3;
880 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
881 if (tb1 == tb) {
882 *ptb = tb1->page_next[n1];
883 break;
884 }
885 ptb = &tb1->page_next[n1];
886 }
887 }
888
889 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
890 {
891 TranslationBlock *tb1, **ptb;
892 unsigned int n1;
893
894 ptb = &tb->jmp_next[n];
895 tb1 = *ptb;
896 if (tb1) {
897 /* find tb(n) in circular list */
898 for(;;) {
899 tb1 = *ptb;
900 n1 = (uintptr_t)tb1 & 3;
901 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
902 if (n1 == n && tb1 == tb)
903 break;
904 if (n1 == 2) {
905 ptb = &tb1->jmp_first;
906 } else {
907 ptb = &tb1->jmp_next[n1];
908 }
909 }
910 /* now we can suppress tb(n) from the list */
911 *ptb = tb->jmp_next[n];
912
913 tb->jmp_next[n] = NULL;
914 }
915 }
916
917 /* reset the jump entry 'n' of a TB so that it is not chained to
918 another TB */
919 static inline void tb_reset_jump(TranslationBlock *tb, int n)
920 {
921 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
922 }
923
924 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
925 {
926 CPUArchState *env;
927 PageDesc *p;
928 unsigned int h, n1;
929 tb_page_addr_t phys_pc;
930 TranslationBlock *tb1, *tb2;
931
932 /* remove the TB from the hash list */
933 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
934 h = tb_phys_hash_func(phys_pc);
935 tb_remove(&tb_phys_hash[h], tb,
936 offsetof(TranslationBlock, phys_hash_next));
937
938 /* remove the TB from the page list */
939 if (tb->page_addr[0] != page_addr) {
940 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
941 tb_page_remove(&p->first_tb, tb);
942 invalidate_page_bitmap(p);
943 }
944 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
945 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
946 tb_page_remove(&p->first_tb, tb);
947 invalidate_page_bitmap(p);
948 }
949
950 tb_invalidated_flag = 1;
951
952 /* remove the TB from the hash list */
953 h = tb_jmp_cache_hash_func(tb->pc);
954 for(env = first_cpu; env != NULL; env = env->next_cpu) {
955 if (env->tb_jmp_cache[h] == tb)
956 env->tb_jmp_cache[h] = NULL;
957 }
958
959 /* suppress this TB from the two jump lists */
960 tb_jmp_remove(tb, 0);
961 tb_jmp_remove(tb, 1);
962
963 /* suppress any remaining jumps to this TB */
964 tb1 = tb->jmp_first;
965 for(;;) {
966 n1 = (uintptr_t)tb1 & 3;
967 if (n1 == 2)
968 break;
969 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
970 tb2 = tb1->jmp_next[n1];
971 tb_reset_jump(tb1, n1);
972 tb1->jmp_next[n1] = NULL;
973 tb1 = tb2;
974 }
975 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
976
977 tb_phys_invalidate_count++;
978 }
979
980 static inline void set_bits(uint8_t *tab, int start, int len)
981 {
982 int end, mask, end1;
983
984 end = start + len;
985 tab += start >> 3;
986 mask = 0xff << (start & 7);
987 if ((start & ~7) == (end & ~7)) {
988 if (start < end) {
989 mask &= ~(0xff << (end & 7));
990 *tab |= mask;
991 }
992 } else {
993 *tab++ |= mask;
994 start = (start + 8) & ~7;
995 end1 = end & ~7;
996 while (start < end1) {
997 *tab++ = 0xff;
998 start += 8;
999 }
1000 if (start < end) {
1001 mask = ~(0xff << (end & 7));
1002 *tab |= mask;
1003 }
1004 }
1005 }
1006
1007 static void build_page_bitmap(PageDesc *p)
1008 {
1009 int n, tb_start, tb_end;
1010 TranslationBlock *tb;
1011
1012 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1013
1014 tb = p->first_tb;
1015 while (tb != NULL) {
1016 n = (uintptr_t)tb & 3;
1017 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1018 /* NOTE: this is subtle as a TB may span two physical pages */
1019 if (n == 0) {
1020 /* NOTE: tb_end may be after the end of the page, but
1021 it is not a problem */
1022 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1023 tb_end = tb_start + tb->size;
1024 if (tb_end > TARGET_PAGE_SIZE)
1025 tb_end = TARGET_PAGE_SIZE;
1026 } else {
1027 tb_start = 0;
1028 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1029 }
1030 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1031 tb = tb->page_next[n];
1032 }
1033 }
1034
1035 TranslationBlock *tb_gen_code(CPUArchState *env,
1036 target_ulong pc, target_ulong cs_base,
1037 int flags, int cflags)
1038 {
1039 TranslationBlock *tb;
1040 uint8_t *tc_ptr;
1041 tb_page_addr_t phys_pc, phys_page2;
1042 target_ulong virt_page2;
1043 int code_gen_size;
1044
1045 phys_pc = get_page_addr_code(env, pc);
1046 tb = tb_alloc(pc);
1047 if (!tb) {
1048 /* flush must be done */
1049 tb_flush(env);
1050 /* cannot fail at this point */
1051 tb = tb_alloc(pc);
1052 /* Don't forget to invalidate previous TB info. */
1053 tb_invalidated_flag = 1;
1054 }
1055 tc_ptr = code_gen_ptr;
1056 tb->tc_ptr = tc_ptr;
1057 tb->cs_base = cs_base;
1058 tb->flags = flags;
1059 tb->cflags = cflags;
1060 cpu_gen_code(env, tb, &code_gen_size);
1061 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1062 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1063
1064 /* check next page if needed */
1065 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1066 phys_page2 = -1;
1067 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1068 phys_page2 = get_page_addr_code(env, virt_page2);
1069 }
1070 tb_link_page(tb, phys_pc, phys_page2);
1071 return tb;
1072 }
1073
1074 /*
1075 * Invalidate all TBs which intersect with the target physical address range
1076 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1077 * 'is_cpu_write_access' should be true if called from a real cpu write
1078 * access: the virtual CPU will exit the current TB if code is modified inside
1079 * this TB.
1080 */
1081 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1082 int is_cpu_write_access)
1083 {
1084 while (start < end) {
1085 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1086 start &= TARGET_PAGE_MASK;
1087 start += TARGET_PAGE_SIZE;
1088 }
1089 }
1090
1091 /*
1092 * Invalidate all TBs which intersect with the target physical address range
1093 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1094 * 'is_cpu_write_access' should be true if called from a real cpu write
1095 * access: the virtual CPU will exit the current TB if code is modified inside
1096 * this TB.
1097 */
1098 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1099 int is_cpu_write_access)
1100 {
1101 TranslationBlock *tb, *tb_next, *saved_tb;
1102 CPUArchState *env = cpu_single_env;
1103 tb_page_addr_t tb_start, tb_end;
1104 PageDesc *p;
1105 int n;
1106 #ifdef TARGET_HAS_PRECISE_SMC
1107 int current_tb_not_found = is_cpu_write_access;
1108 TranslationBlock *current_tb = NULL;
1109 int current_tb_modified = 0;
1110 target_ulong current_pc = 0;
1111 target_ulong current_cs_base = 0;
1112 int current_flags = 0;
1113 #endif /* TARGET_HAS_PRECISE_SMC */
1114
1115 p = page_find(start >> TARGET_PAGE_BITS);
1116 if (!p)
1117 return;
1118 if (!p->code_bitmap &&
1119 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1120 is_cpu_write_access) {
1121 /* build code bitmap */
1122 build_page_bitmap(p);
1123 }
1124
1125 /* we remove all the TBs in the range [start, end[ */
1126 /* XXX: see if in some cases it could be faster to invalidate all the code */
1127 tb = p->first_tb;
1128 while (tb != NULL) {
1129 n = (uintptr_t)tb & 3;
1130 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1131 tb_next = tb->page_next[n];
1132 /* NOTE: this is subtle as a TB may span two physical pages */
1133 if (n == 0) {
1134 /* NOTE: tb_end may be after the end of the page, but
1135 it is not a problem */
1136 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1137 tb_end = tb_start + tb->size;
1138 } else {
1139 tb_start = tb->page_addr[1];
1140 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1141 }
1142 if (!(tb_end <= start || tb_start >= end)) {
1143 #ifdef TARGET_HAS_PRECISE_SMC
1144 if (current_tb_not_found) {
1145 current_tb_not_found = 0;
1146 current_tb = NULL;
1147 if (env->mem_io_pc) {
1148 /* now we have a real cpu fault */
1149 current_tb = tb_find_pc(env->mem_io_pc);
1150 }
1151 }
1152 if (current_tb == tb &&
1153 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1154 /* If we are modifying the current TB, we must stop
1155 its execution. We could be more precise by checking
1156 that the modification is after the current PC, but it
1157 would require a specialized function to partially
1158 restore the CPU state */
1159
1160 current_tb_modified = 1;
1161 cpu_restore_state(current_tb, env, env->mem_io_pc);
1162 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1163 &current_flags);
1164 }
1165 #endif /* TARGET_HAS_PRECISE_SMC */
1166 /* we need to do that to handle the case where a signal
1167 occurs while doing tb_phys_invalidate() */
1168 saved_tb = NULL;
1169 if (env) {
1170 saved_tb = env->current_tb;
1171 env->current_tb = NULL;
1172 }
1173 tb_phys_invalidate(tb, -1);
1174 if (env) {
1175 env->current_tb = saved_tb;
1176 if (env->interrupt_request && env->current_tb)
1177 cpu_interrupt(env, env->interrupt_request);
1178 }
1179 }
1180 tb = tb_next;
1181 }
1182 #if !defined(CONFIG_USER_ONLY)
1183 /* if no code remaining, no need to continue to use slow writes */
1184 if (!p->first_tb) {
1185 invalidate_page_bitmap(p);
1186 if (is_cpu_write_access) {
1187 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1188 }
1189 }
1190 #endif
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (current_tb_modified) {
1193 /* we generate a block containing just the instruction
1194 modifying the memory. It will ensure that it cannot modify
1195 itself */
1196 env->current_tb = NULL;
1197 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1198 cpu_resume_from_signal(env, NULL);
1199 }
1200 #endif
1201 }
1202
1203 /* len must be <= 8 and start must be a multiple of len */
1204 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1205 {
1206 PageDesc *p;
1207 int offset, b;
1208 #if 0
1209 if (1) {
1210 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1211 cpu_single_env->mem_io_vaddr, len,
1212 cpu_single_env->eip,
1213 cpu_single_env->eip +
1214 (intptr_t)cpu_single_env->segs[R_CS].base);
1215 }
1216 #endif
1217 p = page_find(start >> TARGET_PAGE_BITS);
1218 if (!p)
1219 return;
1220 if (p->code_bitmap) {
1221 offset = start & ~TARGET_PAGE_MASK;
1222 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1223 if (b & ((1 << len) - 1))
1224 goto do_invalidate;
1225 } else {
1226 do_invalidate:
1227 tb_invalidate_phys_page_range(start, start + len, 1);
1228 }
1229 }
1230
1231 #if !defined(CONFIG_SOFTMMU)
1232 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1233 uintptr_t pc, void *puc)
1234 {
1235 TranslationBlock *tb;
1236 PageDesc *p;
1237 int n;
1238 #ifdef TARGET_HAS_PRECISE_SMC
1239 TranslationBlock *current_tb = NULL;
1240 CPUArchState *env = cpu_single_env;
1241 int current_tb_modified = 0;
1242 target_ulong current_pc = 0;
1243 target_ulong current_cs_base = 0;
1244 int current_flags = 0;
1245 #endif
1246
1247 addr &= TARGET_PAGE_MASK;
1248 p = page_find(addr >> TARGET_PAGE_BITS);
1249 if (!p)
1250 return;
1251 tb = p->first_tb;
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (tb && pc != 0) {
1254 current_tb = tb_find_pc(pc);
1255 }
1256 #endif
1257 while (tb != NULL) {
1258 n = (uintptr_t)tb & 3;
1259 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (current_tb == tb &&
1262 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1263 /* If we are modifying the current TB, we must stop
1264 its execution. We could be more precise by checking
1265 that the modification is after the current PC, but it
1266 would require a specialized function to partially
1267 restore the CPU state */
1268
1269 current_tb_modified = 1;
1270 cpu_restore_state(current_tb, env, pc);
1271 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1272 &current_flags);
1273 }
1274 #endif /* TARGET_HAS_PRECISE_SMC */
1275 tb_phys_invalidate(tb, addr);
1276 tb = tb->page_next[n];
1277 }
1278 p->first_tb = NULL;
1279 #ifdef TARGET_HAS_PRECISE_SMC
1280 if (current_tb_modified) {
1281 /* we generate a block containing just the instruction
1282 modifying the memory. It will ensure that it cannot modify
1283 itself */
1284 env->current_tb = NULL;
1285 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1286 cpu_resume_from_signal(env, puc);
1287 }
1288 #endif
1289 }
1290 #endif
1291
1292 /* add the tb in the target page and protect it if necessary */
1293 static inline void tb_alloc_page(TranslationBlock *tb,
1294 unsigned int n, tb_page_addr_t page_addr)
1295 {
1296 PageDesc *p;
1297 #ifndef CONFIG_USER_ONLY
1298 bool page_already_protected;
1299 #endif
1300
1301 tb->page_addr[n] = page_addr;
1302 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1303 tb->page_next[n] = p->first_tb;
1304 #ifndef CONFIG_USER_ONLY
1305 page_already_protected = p->first_tb != NULL;
1306 #endif
1307 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1308 invalidate_page_bitmap(p);
1309
1310 #if defined(TARGET_HAS_SMC) || 1
1311
1312 #if defined(CONFIG_USER_ONLY)
1313 if (p->flags & PAGE_WRITE) {
1314 target_ulong addr;
1315 PageDesc *p2;
1316 int prot;
1317
1318 /* force the host page as non writable (writes will have a
1319 page fault + mprotect overhead) */
1320 page_addr &= qemu_host_page_mask;
1321 prot = 0;
1322 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1323 addr += TARGET_PAGE_SIZE) {
1324
1325 p2 = page_find (addr >> TARGET_PAGE_BITS);
1326 if (!p2)
1327 continue;
1328 prot |= p2->flags;
1329 p2->flags &= ~PAGE_WRITE;
1330 }
1331 mprotect(g2h(page_addr), qemu_host_page_size,
1332 (prot & PAGE_BITS) & ~PAGE_WRITE);
1333 #ifdef DEBUG_TB_INVALIDATE
1334 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1335 page_addr);
1336 #endif
1337 }
1338 #else
1339 /* if some code is already present, then the pages are already
1340 protected. So we handle the case where only the first TB is
1341 allocated in a physical page */
1342 if (!page_already_protected) {
1343 tlb_protect_code(page_addr);
1344 }
1345 #endif
1346
1347 #endif /* TARGET_HAS_SMC */
1348 }
1349
1350 /* add a new TB and link it to the physical page tables. phys_page2 is
1351 (-1) to indicate that only one page contains the TB. */
1352 void tb_link_page(TranslationBlock *tb,
1353 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1354 {
1355 unsigned int h;
1356 TranslationBlock **ptb;
1357
1358 /* Grab the mmap lock to stop another thread invalidating this TB
1359 before we are done. */
1360 mmap_lock();
1361 /* add in the physical hash table */
1362 h = tb_phys_hash_func(phys_pc);
1363 ptb = &tb_phys_hash[h];
1364 tb->phys_hash_next = *ptb;
1365 *ptb = tb;
1366
1367 /* add in the page list */
1368 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1369 if (phys_page2 != -1)
1370 tb_alloc_page(tb, 1, phys_page2);
1371 else
1372 tb->page_addr[1] = -1;
1373
1374 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1375 tb->jmp_next[0] = NULL;
1376 tb->jmp_next[1] = NULL;
1377
1378 /* init original jump addresses */
1379 if (tb->tb_next_offset[0] != 0xffff)
1380 tb_reset_jump(tb, 0);
1381 if (tb->tb_next_offset[1] != 0xffff)
1382 tb_reset_jump(tb, 1);
1383
1384 #ifdef DEBUG_TB_CHECK
1385 tb_page_check();
1386 #endif
1387 mmap_unlock();
1388 }
1389
1390 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1391 tb[1].tc_ptr. Return NULL if not found */
1392 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1393 {
1394 int m_min, m_max, m;
1395 uintptr_t v;
1396 TranslationBlock *tb;
1397
1398 if (nb_tbs <= 0)
1399 return NULL;
1400 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1401 tc_ptr >= (uintptr_t)code_gen_ptr) {
1402 return NULL;
1403 }
1404 /* binary search (cf Knuth) */
1405 m_min = 0;
1406 m_max = nb_tbs - 1;
1407 while (m_min <= m_max) {
1408 m = (m_min + m_max) >> 1;
1409 tb = &tbs[m];
1410 v = (uintptr_t)tb->tc_ptr;
1411 if (v == tc_ptr)
1412 return tb;
1413 else if (tc_ptr < v) {
1414 m_max = m - 1;
1415 } else {
1416 m_min = m + 1;
1417 }
1418 }
1419 return &tbs[m_max];
1420 }
1421
1422 static void tb_reset_jump_recursive(TranslationBlock *tb);
1423
1424 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1425 {
1426 TranslationBlock *tb1, *tb_next, **ptb;
1427 unsigned int n1;
1428
1429 tb1 = tb->jmp_next[n];
1430 if (tb1 != NULL) {
1431 /* find head of list */
1432 for(;;) {
1433 n1 = (uintptr_t)tb1 & 3;
1434 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1435 if (n1 == 2)
1436 break;
1437 tb1 = tb1->jmp_next[n1];
1438 }
1439 /* we are now sure now that tb jumps to tb1 */
1440 tb_next = tb1;
1441
1442 /* remove tb from the jmp_first list */
1443 ptb = &tb_next->jmp_first;
1444 for(;;) {
1445 tb1 = *ptb;
1446 n1 = (uintptr_t)tb1 & 3;
1447 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1448 if (n1 == n && tb1 == tb)
1449 break;
1450 ptb = &tb1->jmp_next[n1];
1451 }
1452 *ptb = tb->jmp_next[n];
1453 tb->jmp_next[n] = NULL;
1454
1455 /* suppress the jump to next tb in generated code */
1456 tb_reset_jump(tb, n);
1457
1458 /* suppress jumps in the tb on which we could have jumped */
1459 tb_reset_jump_recursive(tb_next);
1460 }
1461 }
1462
1463 static void tb_reset_jump_recursive(TranslationBlock *tb)
1464 {
1465 tb_reset_jump_recursive2(tb, 0);
1466 tb_reset_jump_recursive2(tb, 1);
1467 }
1468
1469 #if defined(TARGET_HAS_ICE)
1470 #if defined(CONFIG_USER_ONLY)
1471 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1472 {
1473 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1474 }
1475 #else
1476 void tb_invalidate_phys_addr(hwaddr addr)
1477 {
1478 ram_addr_t ram_addr;
1479 MemoryRegionSection *section;
1480
1481 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1482 if (!(memory_region_is_ram(section->mr)
1483 || (section->mr->rom_device && section->mr->readable))) {
1484 return;
1485 }
1486 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1487 + memory_region_section_addr(section, addr);
1488 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1489 }
1490
1491 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1492 {
1493 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1494 (pc & ~TARGET_PAGE_MASK));
1495 }
1496 #endif
1497 #endif /* TARGET_HAS_ICE */
1498
1499 #if defined(CONFIG_USER_ONLY)
1500 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1501
1502 {
1503 }
1504
1505 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1506 int flags, CPUWatchpoint **watchpoint)
1507 {
1508 return -ENOSYS;
1509 }
1510 #else
1511 /* Add a watchpoint. */
1512 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1513 int flags, CPUWatchpoint **watchpoint)
1514 {
1515 target_ulong len_mask = ~(len - 1);
1516 CPUWatchpoint *wp;
1517
1518 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1519 if ((len & (len - 1)) || (addr & ~len_mask) ||
1520 len == 0 || len > TARGET_PAGE_SIZE) {
1521 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1522 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1523 return -EINVAL;
1524 }
1525 wp = g_malloc(sizeof(*wp));
1526
1527 wp->vaddr = addr;
1528 wp->len_mask = len_mask;
1529 wp->flags = flags;
1530
1531 /* keep all GDB-injected watchpoints in front */
1532 if (flags & BP_GDB)
1533 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1534 else
1535 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1536
1537 tlb_flush_page(env, addr);
1538
1539 if (watchpoint)
1540 *watchpoint = wp;
1541 return 0;
1542 }
1543
1544 /* Remove a specific watchpoint. */
1545 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1546 int flags)
1547 {
1548 target_ulong len_mask = ~(len - 1);
1549 CPUWatchpoint *wp;
1550
1551 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1552 if (addr == wp->vaddr && len_mask == wp->len_mask
1553 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1554 cpu_watchpoint_remove_by_ref(env, wp);
1555 return 0;
1556 }
1557 }
1558 return -ENOENT;
1559 }
1560
1561 /* Remove a specific watchpoint by reference. */
1562 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1563 {
1564 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1565
1566 tlb_flush_page(env, watchpoint->vaddr);
1567
1568 g_free(watchpoint);
1569 }
1570
1571 /* Remove all matching watchpoints. */
1572 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1573 {
1574 CPUWatchpoint *wp, *next;
1575
1576 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1577 if (wp->flags & mask)
1578 cpu_watchpoint_remove_by_ref(env, wp);
1579 }
1580 }
1581 #endif
1582
1583 /* Add a breakpoint. */
1584 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1585 CPUBreakpoint **breakpoint)
1586 {
1587 #if defined(TARGET_HAS_ICE)
1588 CPUBreakpoint *bp;
1589
1590 bp = g_malloc(sizeof(*bp));
1591
1592 bp->pc = pc;
1593 bp->flags = flags;
1594
1595 /* keep all GDB-injected breakpoints in front */
1596 if (flags & BP_GDB)
1597 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1598 else
1599 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1600
1601 breakpoint_invalidate(env, pc);
1602
1603 if (breakpoint)
1604 *breakpoint = bp;
1605 return 0;
1606 #else
1607 return -ENOSYS;
1608 #endif
1609 }
1610
1611 /* Remove a specific breakpoint. */
1612 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1613 {
1614 #if defined(TARGET_HAS_ICE)
1615 CPUBreakpoint *bp;
1616
1617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1618 if (bp->pc == pc && bp->flags == flags) {
1619 cpu_breakpoint_remove_by_ref(env, bp);
1620 return 0;
1621 }
1622 }
1623 return -ENOENT;
1624 #else
1625 return -ENOSYS;
1626 #endif
1627 }
1628
1629 /* Remove a specific breakpoint by reference. */
1630 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1631 {
1632 #if defined(TARGET_HAS_ICE)
1633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1634
1635 breakpoint_invalidate(env, breakpoint->pc);
1636
1637 g_free(breakpoint);
1638 #endif
1639 }
1640
1641 /* Remove all matching breakpoints. */
1642 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1643 {
1644 #if defined(TARGET_HAS_ICE)
1645 CPUBreakpoint *bp, *next;
1646
1647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1648 if (bp->flags & mask)
1649 cpu_breakpoint_remove_by_ref(env, bp);
1650 }
1651 #endif
1652 }
1653
1654 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1655 CPU loop after each instruction */
1656 void cpu_single_step(CPUArchState *env, int enabled)
1657 {
1658 #if defined(TARGET_HAS_ICE)
1659 if (env->singlestep_enabled != enabled) {
1660 env->singlestep_enabled = enabled;
1661 if (kvm_enabled())
1662 kvm_update_guest_debug(env, 0);
1663 else {
1664 /* must flush all the translated code to avoid inconsistencies */
1665 /* XXX: only flush what is necessary */
1666 tb_flush(env);
1667 }
1668 }
1669 #endif
1670 }
1671
1672 static void cpu_unlink_tb(CPUArchState *env)
1673 {
1674 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1675 problem and hope the cpu will stop of its own accord. For userspace
1676 emulation this often isn't actually as bad as it sounds. Often
1677 signals are used primarily to interrupt blocking syscalls. */
1678 TranslationBlock *tb;
1679 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1680
1681 spin_lock(&interrupt_lock);
1682 tb = env->current_tb;
1683 /* if the cpu is currently executing code, we must unlink it and
1684 all the potentially executing TB */
1685 if (tb) {
1686 env->current_tb = NULL;
1687 tb_reset_jump_recursive(tb);
1688 }
1689 spin_unlock(&interrupt_lock);
1690 }
1691
1692 #ifndef CONFIG_USER_ONLY
1693 /* mask must never be zero, except for A20 change call */
1694 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1695 {
1696 CPUState *cpu = ENV_GET_CPU(env);
1697 int old_mask;
1698
1699 old_mask = env->interrupt_request;
1700 env->interrupt_request |= mask;
1701
1702 /*
1703 * If called from iothread context, wake the target cpu in
1704 * case its halted.
1705 */
1706 if (!qemu_cpu_is_self(cpu)) {
1707 qemu_cpu_kick(env);
1708 return;
1709 }
1710
1711 if (use_icount) {
1712 env->icount_decr.u16.high = 0xffff;
1713 if (!can_do_io(env)
1714 && (mask & ~old_mask) != 0) {
1715 cpu_abort(env, "Raised interrupt while not in I/O function");
1716 }
1717 } else {
1718 cpu_unlink_tb(env);
1719 }
1720 }
1721
1722 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1723
1724 #else /* CONFIG_USER_ONLY */
1725
1726 void cpu_interrupt(CPUArchState *env, int mask)
1727 {
1728 env->interrupt_request |= mask;
1729 cpu_unlink_tb(env);
1730 }
1731 #endif /* CONFIG_USER_ONLY */
1732
1733 void cpu_reset_interrupt(CPUArchState *env, int mask)
1734 {
1735 env->interrupt_request &= ~mask;
1736 }
1737
1738 void cpu_exit(CPUArchState *env)
1739 {
1740 env->exit_request = 1;
1741 cpu_unlink_tb(env);
1742 }
1743
1744 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1745 {
1746 va_list ap;
1747 va_list ap2;
1748
1749 va_start(ap, fmt);
1750 va_copy(ap2, ap);
1751 fprintf(stderr, "qemu: fatal: ");
1752 vfprintf(stderr, fmt, ap);
1753 fprintf(stderr, "\n");
1754 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1755 if (qemu_log_enabled()) {
1756 qemu_log("qemu: fatal: ");
1757 qemu_log_vprintf(fmt, ap2);
1758 qemu_log("\n");
1759 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1760 qemu_log_flush();
1761 qemu_log_close();
1762 }
1763 va_end(ap2);
1764 va_end(ap);
1765 #if defined(CONFIG_USER_ONLY)
1766 {
1767 struct sigaction act;
1768 sigfillset(&act.sa_mask);
1769 act.sa_handler = SIG_DFL;
1770 sigaction(SIGABRT, &act, NULL);
1771 }
1772 #endif
1773 abort();
1774 }
1775
1776 CPUArchState *cpu_copy(CPUArchState *env)
1777 {
1778 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1779 CPUArchState *next_cpu = new_env->next_cpu;
1780 int cpu_index = new_env->cpu_index;
1781 #if defined(TARGET_HAS_ICE)
1782 CPUBreakpoint *bp;
1783 CPUWatchpoint *wp;
1784 #endif
1785
1786 memcpy(new_env, env, sizeof(CPUArchState));
1787
1788 /* Preserve chaining and index. */
1789 new_env->next_cpu = next_cpu;
1790 new_env->cpu_index = cpu_index;
1791
1792 /* Clone all break/watchpoints.
1793 Note: Once we support ptrace with hw-debug register access, make sure
1794 BP_CPU break/watchpoints are handled correctly on clone. */
1795 QTAILQ_INIT(&env->breakpoints);
1796 QTAILQ_INIT(&env->watchpoints);
1797 #if defined(TARGET_HAS_ICE)
1798 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1799 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1800 }
1801 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1802 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1803 wp->flags, NULL);
1804 }
1805 #endif
1806
1807 return new_env;
1808 }
1809
1810 #if !defined(CONFIG_USER_ONLY)
1811 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1812 {
1813 unsigned int i;
1814
1815 /* Discard jump cache entries for any tb which might potentially
1816 overlap the flushed page. */
1817 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1818 memset (&env->tb_jmp_cache[i], 0,
1819 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1820
1821 i = tb_jmp_cache_hash_page(addr);
1822 memset (&env->tb_jmp_cache[i], 0,
1823 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1824 }
1825
1826 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1827 uintptr_t length)
1828 {
1829 uintptr_t start1;
1830
1831 /* we modify the TLB cache so that the dirty bit will be set again
1832 when accessing the range */
1833 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1834 /* Check that we don't span multiple blocks - this breaks the
1835 address comparisons below. */
1836 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1837 != (end - 1) - start) {
1838 abort();
1839 }
1840 cpu_tlb_reset_dirty_all(start1, length);
1841
1842 }
1843
1844 /* Note: start and end must be within the same ram block. */
1845 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1846 int dirty_flags)
1847 {
1848 uintptr_t length;
1849
1850 start &= TARGET_PAGE_MASK;
1851 end = TARGET_PAGE_ALIGN(end);
1852
1853 length = end - start;
1854 if (length == 0)
1855 return;
1856 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1857
1858 if (tcg_enabled()) {
1859 tlb_reset_dirty_range_all(start, end, length);
1860 }
1861 }
1862
1863 int cpu_physical_memory_set_dirty_tracking(int enable)
1864 {
1865 int ret = 0;
1866 in_migration = enable;
1867 return ret;
1868 }
1869
1870 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1871 MemoryRegionSection *section,
1872 target_ulong vaddr,
1873 hwaddr paddr,
1874 int prot,
1875 target_ulong *address)
1876 {
1877 hwaddr iotlb;
1878 CPUWatchpoint *wp;
1879
1880 if (memory_region_is_ram(section->mr)) {
1881 /* Normal RAM. */
1882 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1883 + memory_region_section_addr(section, paddr);
1884 if (!section->readonly) {
1885 iotlb |= phys_section_notdirty;
1886 } else {
1887 iotlb |= phys_section_rom;
1888 }
1889 } else {
1890 /* IO handlers are currently passed a physical address.
1891 It would be nice to pass an offset from the base address
1892 of that region. This would avoid having to special case RAM,
1893 and avoid full address decoding in every device.
1894 We can't use the high bits of pd for this because
1895 IO_MEM_ROMD uses these as a ram address. */
1896 iotlb = section - phys_sections;
1897 iotlb += memory_region_section_addr(section, paddr);
1898 }
1899
1900 /* Make accesses to pages with watchpoints go via the
1901 watchpoint trap routines. */
1902 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1903 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1904 /* Avoid trapping reads of pages with a write breakpoint. */
1905 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1906 iotlb = phys_section_watch + paddr;
1907 *address |= TLB_MMIO;
1908 break;
1909 }
1910 }
1911 }
1912
1913 return iotlb;
1914 }
1915
1916 #else
1917 /*
1918 * Walks guest process memory "regions" one by one
1919 * and calls callback function 'fn' for each region.
1920 */
1921
1922 struct walk_memory_regions_data
1923 {
1924 walk_memory_regions_fn fn;
1925 void *priv;
1926 uintptr_t start;
1927 int prot;
1928 };
1929
1930 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1931 abi_ulong end, int new_prot)
1932 {
1933 if (data->start != -1ul) {
1934 int rc = data->fn(data->priv, data->start, end, data->prot);
1935 if (rc != 0) {
1936 return rc;
1937 }
1938 }
1939
1940 data->start = (new_prot ? end : -1ul);
1941 data->prot = new_prot;
1942
1943 return 0;
1944 }
1945
1946 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1947 abi_ulong base, int level, void **lp)
1948 {
1949 abi_ulong pa;
1950 int i, rc;
1951
1952 if (*lp == NULL) {
1953 return walk_memory_regions_end(data, base, 0);
1954 }
1955
1956 if (level == 0) {
1957 PageDesc *pd = *lp;
1958 for (i = 0; i < L2_SIZE; ++i) {
1959 int prot = pd[i].flags;
1960
1961 pa = base | (i << TARGET_PAGE_BITS);
1962 if (prot != data->prot) {
1963 rc = walk_memory_regions_end(data, pa, prot);
1964 if (rc != 0) {
1965 return rc;
1966 }
1967 }
1968 }
1969 } else {
1970 void **pp = *lp;
1971 for (i = 0; i < L2_SIZE; ++i) {
1972 pa = base | ((abi_ulong)i <<
1973 (TARGET_PAGE_BITS + L2_BITS * level));
1974 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1975 if (rc != 0) {
1976 return rc;
1977 }
1978 }
1979 }
1980
1981 return 0;
1982 }
1983
1984 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1985 {
1986 struct walk_memory_regions_data data;
1987 uintptr_t i;
1988
1989 data.fn = fn;
1990 data.priv = priv;
1991 data.start = -1ul;
1992 data.prot = 0;
1993
1994 for (i = 0; i < V_L1_SIZE; i++) {
1995 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1996 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1997 if (rc != 0) {
1998 return rc;
1999 }
2000 }
2001
2002 return walk_memory_regions_end(&data, 0, 0);
2003 }
2004
2005 static int dump_region(void *priv, abi_ulong start,
2006 abi_ulong end, unsigned long prot)
2007 {
2008 FILE *f = (FILE *)priv;
2009
2010 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2011 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2012 start, end, end - start,
2013 ((prot & PAGE_READ) ? 'r' : '-'),
2014 ((prot & PAGE_WRITE) ? 'w' : '-'),
2015 ((prot & PAGE_EXEC) ? 'x' : '-'));
2016
2017 return (0);
2018 }
2019
2020 /* dump memory mappings */
2021 void page_dump(FILE *f)
2022 {
2023 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2024 "start", "end", "size", "prot");
2025 walk_memory_regions(f, dump_region);
2026 }
2027
2028 int page_get_flags(target_ulong address)
2029 {
2030 PageDesc *p;
2031
2032 p = page_find(address >> TARGET_PAGE_BITS);
2033 if (!p)
2034 return 0;
2035 return p->flags;
2036 }
2037
2038 /* Modify the flags of a page and invalidate the code if necessary.
2039 The flag PAGE_WRITE_ORG is positioned automatically depending
2040 on PAGE_WRITE. The mmap_lock should already be held. */
2041 void page_set_flags(target_ulong start, target_ulong end, int flags)
2042 {
2043 target_ulong addr, len;
2044
2045 /* This function should never be called with addresses outside the
2046 guest address space. If this assert fires, it probably indicates
2047 a missing call to h2g_valid. */
2048 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2049 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2050 #endif
2051 assert(start < end);
2052
2053 start = start & TARGET_PAGE_MASK;
2054 end = TARGET_PAGE_ALIGN(end);
2055
2056 if (flags & PAGE_WRITE) {
2057 flags |= PAGE_WRITE_ORG;
2058 }
2059
2060 for (addr = start, len = end - start;
2061 len != 0;
2062 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2063 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2064
2065 /* If the write protection bit is set, then we invalidate
2066 the code inside. */
2067 if (!(p->flags & PAGE_WRITE) &&
2068 (flags & PAGE_WRITE) &&
2069 p->first_tb) {
2070 tb_invalidate_phys_page(addr, 0, NULL);
2071 }
2072 p->flags = flags;
2073 }
2074 }
2075
2076 int page_check_range(target_ulong start, target_ulong len, int flags)
2077 {
2078 PageDesc *p;
2079 target_ulong end;
2080 target_ulong addr;
2081
2082 /* This function should never be called with addresses outside the
2083 guest address space. If this assert fires, it probably indicates
2084 a missing call to h2g_valid. */
2085 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2086 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2087 #endif
2088
2089 if (len == 0) {
2090 return 0;
2091 }
2092 if (start + len - 1 < start) {
2093 /* We've wrapped around. */
2094 return -1;
2095 }
2096
2097 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2098 start = start & TARGET_PAGE_MASK;
2099
2100 for (addr = start, len = end - start;
2101 len != 0;
2102 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2103 p = page_find(addr >> TARGET_PAGE_BITS);
2104 if( !p )
2105 return -1;
2106 if( !(p->flags & PAGE_VALID) )
2107 return -1;
2108
2109 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2110 return -1;
2111 if (flags & PAGE_WRITE) {
2112 if (!(p->flags & PAGE_WRITE_ORG))
2113 return -1;
2114 /* unprotect the page if it was put read-only because it
2115 contains translated code */
2116 if (!(p->flags & PAGE_WRITE)) {
2117 if (!page_unprotect(addr, 0, NULL))
2118 return -1;
2119 }
2120 return 0;
2121 }
2122 }
2123 return 0;
2124 }
2125
2126 /* called from signal handler: invalidate the code and unprotect the
2127 page. Return TRUE if the fault was successfully handled. */
2128 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2129 {
2130 unsigned int prot;
2131 PageDesc *p;
2132 target_ulong host_start, host_end, addr;
2133
2134 /* Technically this isn't safe inside a signal handler. However we
2135 know this only ever happens in a synchronous SEGV handler, so in
2136 practice it seems to be ok. */
2137 mmap_lock();
2138
2139 p = page_find(address >> TARGET_PAGE_BITS);
2140 if (!p) {
2141 mmap_unlock();
2142 return 0;
2143 }
2144
2145 /* if the page was really writable, then we change its
2146 protection back to writable */
2147 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2148 host_start = address & qemu_host_page_mask;
2149 host_end = host_start + qemu_host_page_size;
2150
2151 prot = 0;
2152 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2153 p = page_find(addr >> TARGET_PAGE_BITS);
2154 p->flags |= PAGE_WRITE;
2155 prot |= p->flags;
2156
2157 /* and since the content will be modified, we must invalidate
2158 the corresponding translated code. */
2159 tb_invalidate_phys_page(addr, pc, puc);
2160 #ifdef DEBUG_TB_CHECK
2161 tb_invalidate_check(addr);
2162 #endif
2163 }
2164 mprotect((void *)g2h(host_start), qemu_host_page_size,
2165 prot & PAGE_BITS);
2166
2167 mmap_unlock();
2168 return 1;
2169 }
2170 mmap_unlock();
2171 return 0;
2172 }
2173 #endif /* defined(CONFIG_USER_ONLY) */
2174
2175 #if !defined(CONFIG_USER_ONLY)
2176
2177 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2178 typedef struct subpage_t {
2179 MemoryRegion iomem;
2180 hwaddr base;
2181 uint16_t sub_section[TARGET_PAGE_SIZE];
2182 } subpage_t;
2183
2184 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2185 uint16_t section);
2186 static subpage_t *subpage_init(hwaddr base);
2187 static void destroy_page_desc(uint16_t section_index)
2188 {
2189 MemoryRegionSection *section = &phys_sections[section_index];
2190 MemoryRegion *mr = section->mr;
2191
2192 if (mr->subpage) {
2193 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2194 memory_region_destroy(&subpage->iomem);
2195 g_free(subpage);
2196 }
2197 }
2198
2199 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2200 {
2201 unsigned i;
2202 PhysPageEntry *p;
2203
2204 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2205 return;
2206 }
2207
2208 p = phys_map_nodes[lp->ptr];
2209 for (i = 0; i < L2_SIZE; ++i) {
2210 if (!p[i].is_leaf) {
2211 destroy_l2_mapping(&p[i], level - 1);
2212 } else {
2213 destroy_page_desc(p[i].ptr);
2214 }
2215 }
2216 lp->is_leaf = 0;
2217 lp->ptr = PHYS_MAP_NODE_NIL;
2218 }
2219
2220 static void destroy_all_mappings(AddressSpaceDispatch *d)
2221 {
2222 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2223 phys_map_nodes_reset();
2224 }
2225
2226 static uint16_t phys_section_add(MemoryRegionSection *section)
2227 {
2228 if (phys_sections_nb == phys_sections_nb_alloc) {
2229 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2230 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2231 phys_sections_nb_alloc);
2232 }
2233 phys_sections[phys_sections_nb] = *section;
2234 return phys_sections_nb++;
2235 }
2236
2237 static void phys_sections_clear(void)
2238 {
2239 phys_sections_nb = 0;
2240 }
2241
2242 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2243 {
2244 subpage_t *subpage;
2245 hwaddr base = section->offset_within_address_space
2246 & TARGET_PAGE_MASK;
2247 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2248 MemoryRegionSection subsection = {
2249 .offset_within_address_space = base,
2250 .size = TARGET_PAGE_SIZE,
2251 };
2252 hwaddr start, end;
2253
2254 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2255
2256 if (!(existing->mr->subpage)) {
2257 subpage = subpage_init(base);
2258 subsection.mr = &subpage->iomem;
2259 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2260 phys_section_add(&subsection));
2261 } else {
2262 subpage = container_of(existing->mr, subpage_t, iomem);
2263 }
2264 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2265 end = start + section->size - 1;
2266 subpage_register(subpage, start, end, phys_section_add(section));
2267 }
2268
2269
2270 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2271 {
2272 hwaddr start_addr = section->offset_within_address_space;
2273 ram_addr_t size = section->size;
2274 hwaddr addr;
2275 uint16_t section_index = phys_section_add(section);
2276
2277 assert(size);
2278
2279 addr = start_addr;
2280 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2281 section_index);
2282 }
2283
2284 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2285 {
2286 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2287 MemoryRegionSection now = *section, remain = *section;
2288
2289 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2290 || (now.size < TARGET_PAGE_SIZE)) {
2291 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2292 - now.offset_within_address_space,
2293 now.size);
2294 register_subpage(d, &now);
2295 remain.size -= now.size;
2296 remain.offset_within_address_space += now.size;
2297 remain.offset_within_region += now.size;
2298 }
2299 while (remain.size >= TARGET_PAGE_SIZE) {
2300 now = remain;
2301 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2302 now.size = TARGET_PAGE_SIZE;
2303 register_subpage(d, &now);
2304 } else {
2305 now.size &= TARGET_PAGE_MASK;
2306 register_multipage(d, &now);
2307 }
2308 remain.size -= now.size;
2309 remain.offset_within_address_space += now.size;
2310 remain.offset_within_region += now.size;
2311 }
2312 now = remain;
2313 if (now.size) {
2314 register_subpage(d, &now);
2315 }
2316 }
2317
2318 void qemu_flush_coalesced_mmio_buffer(void)
2319 {
2320 if (kvm_enabled())
2321 kvm_flush_coalesced_mmio_buffer();
2322 }
2323
2324 #if defined(__linux__) && !defined(TARGET_S390X)
2325
2326 #include <sys/vfs.h>
2327
2328 #define HUGETLBFS_MAGIC 0x958458f6
2329
2330 static long gethugepagesize(const char *path)
2331 {
2332 struct statfs fs;
2333 int ret;
2334
2335 do {
2336 ret = statfs(path, &fs);
2337 } while (ret != 0 && errno == EINTR);
2338
2339 if (ret != 0) {
2340 perror(path);
2341 return 0;
2342 }
2343
2344 if (fs.f_type != HUGETLBFS_MAGIC)
2345 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2346
2347 return fs.f_bsize;
2348 }
2349
2350 static void *file_ram_alloc(RAMBlock *block,
2351 ram_addr_t memory,
2352 const char *path)
2353 {
2354 char *filename;
2355 void *area;
2356 int fd;
2357 #ifdef MAP_POPULATE
2358 int flags;
2359 #endif
2360 unsigned long hpagesize;
2361
2362 hpagesize = gethugepagesize(path);
2363 if (!hpagesize) {
2364 return NULL;
2365 }
2366
2367 if (memory < hpagesize) {
2368 return NULL;
2369 }
2370
2371 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2372 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2373 return NULL;
2374 }
2375
2376 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2377 return NULL;
2378 }
2379
2380 fd = mkstemp(filename);
2381 if (fd < 0) {
2382 perror("unable to create backing store for hugepages");
2383 free(filename);
2384 return NULL;
2385 }
2386 unlink(filename);
2387 free(filename);
2388
2389 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2390
2391 /*
2392 * ftruncate is not supported by hugetlbfs in older
2393 * hosts, so don't bother bailing out on errors.
2394 * If anything goes wrong with it under other filesystems,
2395 * mmap will fail.
2396 */
2397 if (ftruncate(fd, memory))
2398 perror("ftruncate");
2399
2400 #ifdef MAP_POPULATE
2401 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2402 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2403 * to sidestep this quirk.
2404 */
2405 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2406 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2407 #else
2408 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2409 #endif
2410 if (area == MAP_FAILED) {
2411 perror("file_ram_alloc: can't mmap RAM pages");
2412 close(fd);
2413 return (NULL);
2414 }
2415 block->fd = fd;
2416 return area;
2417 }
2418 #endif
2419
2420 static ram_addr_t find_ram_offset(ram_addr_t size)
2421 {
2422 RAMBlock *block, *next_block;
2423 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2424
2425 if (QLIST_EMPTY(&ram_list.blocks))
2426 return 0;
2427
2428 QLIST_FOREACH(block, &ram_list.blocks, next) {
2429 ram_addr_t end, next = RAM_ADDR_MAX;
2430
2431 end = block->offset + block->length;
2432
2433 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2434 if (next_block->offset >= end) {
2435 next = MIN(next, next_block->offset);
2436 }
2437 }
2438 if (next - end >= size && next - end < mingap) {
2439 offset = end;
2440 mingap = next - end;
2441 }
2442 }
2443
2444 if (offset == RAM_ADDR_MAX) {
2445 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2446 (uint64_t)size);
2447 abort();
2448 }
2449
2450 return offset;
2451 }
2452
2453 ram_addr_t last_ram_offset(void)
2454 {
2455 RAMBlock *block;
2456 ram_addr_t last = 0;
2457
2458 QLIST_FOREACH(block, &ram_list.blocks, next)
2459 last = MAX(last, block->offset + block->length);
2460
2461 return last;
2462 }
2463
2464 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2465 {
2466 int ret;
2467 QemuOpts *machine_opts;
2468
2469 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2470 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2471 if (machine_opts &&
2472 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2473 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2474 if (ret) {
2475 perror("qemu_madvise");
2476 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2477 "but dump_guest_core=off specified\n");
2478 }
2479 }
2480 }
2481
2482 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2483 {
2484 RAMBlock *new_block, *block;
2485
2486 new_block = NULL;
2487 QLIST_FOREACH(block, &ram_list.blocks, next) {
2488 if (block->offset == addr) {
2489 new_block = block;
2490 break;
2491 }
2492 }
2493 assert(new_block);
2494 assert(!new_block->idstr[0]);
2495
2496 if (dev) {
2497 char *id = qdev_get_dev_path(dev);
2498 if (id) {
2499 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2500 g_free(id);
2501 }
2502 }
2503 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2504
2505 QLIST_FOREACH(block, &ram_list.blocks, next) {
2506 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2507 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2508 new_block->idstr);
2509 abort();
2510 }
2511 }
2512 }
2513
2514 static int memory_try_enable_merging(void *addr, size_t len)
2515 {
2516 QemuOpts *opts;
2517
2518 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2519 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2520 /* disabled by the user */
2521 return 0;
2522 }
2523
2524 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2525 }
2526
2527 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2528 MemoryRegion *mr)
2529 {
2530 RAMBlock *new_block;
2531
2532 size = TARGET_PAGE_ALIGN(size);
2533 new_block = g_malloc0(sizeof(*new_block));
2534
2535 new_block->mr = mr;
2536 new_block->offset = find_ram_offset(size);
2537 if (host) {
2538 new_block->host = host;
2539 new_block->flags |= RAM_PREALLOC_MASK;
2540 } else {
2541 if (mem_path) {
2542 #if defined (__linux__) && !defined(TARGET_S390X)
2543 new_block->host = file_ram_alloc(new_block, size, mem_path);
2544 if (!new_block->host) {
2545 new_block->host = qemu_vmalloc(size);
2546 memory_try_enable_merging(new_block->host, size);
2547 }
2548 #else
2549 fprintf(stderr, "-mem-path option unsupported\n");
2550 exit(1);
2551 #endif
2552 } else {
2553 if (xen_enabled()) {
2554 xen_ram_alloc(new_block->offset, size, mr);
2555 } else if (kvm_enabled()) {
2556 /* some s390/kvm configurations have special constraints */
2557 new_block->host = kvm_vmalloc(size);
2558 } else {
2559 new_block->host = qemu_vmalloc(size);
2560 }
2561 memory_try_enable_merging(new_block->host, size);
2562 }
2563 }
2564 new_block->length = size;
2565
2566 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2567
2568 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2569 last_ram_offset() >> TARGET_PAGE_BITS);
2570 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2571 0, size >> TARGET_PAGE_BITS);
2572 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2573
2574 qemu_ram_setup_dump(new_block->host, size);
2575 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2576
2577 if (kvm_enabled())
2578 kvm_setup_guest_memory(new_block->host, size);
2579
2580 return new_block->offset;
2581 }
2582
2583 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2584 {
2585 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2586 }
2587
2588 void qemu_ram_free_from_ptr(ram_addr_t addr)
2589 {
2590 RAMBlock *block;
2591
2592 QLIST_FOREACH(block, &ram_list.blocks, next) {
2593 if (addr == block->offset) {
2594 QLIST_REMOVE(block, next);
2595 g_free(block);
2596 return;
2597 }
2598 }
2599 }
2600
2601 void qemu_ram_free(ram_addr_t addr)
2602 {
2603 RAMBlock *block;
2604
2605 QLIST_FOREACH(block, &ram_list.blocks, next) {
2606 if (addr == block->offset) {
2607 QLIST_REMOVE(block, next);
2608 if (block->flags & RAM_PREALLOC_MASK) {
2609 ;
2610 } else if (mem_path) {
2611 #if defined (__linux__) && !defined(TARGET_S390X)
2612 if (block->fd) {
2613 munmap(block->host, block->length);
2614 close(block->fd);
2615 } else {
2616 qemu_vfree(block->host);
2617 }
2618 #else
2619 abort();
2620 #endif
2621 } else {
2622 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2623 munmap(block->host, block->length);
2624 #else
2625 if (xen_enabled()) {
2626 xen_invalidate_map_cache_entry(block->host);
2627 } else {
2628 qemu_vfree(block->host);
2629 }
2630 #endif
2631 }
2632 g_free(block);
2633 return;
2634 }
2635 }
2636
2637 }
2638
2639 #ifndef _WIN32
2640 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2641 {
2642 RAMBlock *block;
2643 ram_addr_t offset;
2644 int flags;
2645 void *area, *vaddr;
2646
2647 QLIST_FOREACH(block, &ram_list.blocks, next) {
2648 offset = addr - block->offset;
2649 if (offset < block->length) {
2650 vaddr = block->host + offset;
2651 if (block->flags & RAM_PREALLOC_MASK) {
2652 ;
2653 } else {
2654 flags = MAP_FIXED;
2655 munmap(vaddr, length);
2656 if (mem_path) {
2657 #if defined(__linux__) && !defined(TARGET_S390X)
2658 if (block->fd) {
2659 #ifdef MAP_POPULATE
2660 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2661 MAP_PRIVATE;
2662 #else
2663 flags |= MAP_PRIVATE;
2664 #endif
2665 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2666 flags, block->fd, offset);
2667 } else {
2668 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2669 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2670 flags, -1, 0);
2671 }
2672 #else
2673 abort();
2674 #endif
2675 } else {
2676 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2677 flags |= MAP_SHARED | MAP_ANONYMOUS;
2678 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2679 flags, -1, 0);
2680 #else
2681 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2682 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2683 flags, -1, 0);
2684 #endif
2685 }
2686 if (area != vaddr) {
2687 fprintf(stderr, "Could not remap addr: "
2688 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2689 length, addr);
2690 exit(1);
2691 }
2692 memory_try_enable_merging(vaddr, length);
2693 qemu_ram_setup_dump(vaddr, length);
2694 }
2695 return;
2696 }
2697 }
2698 }
2699 #endif /* !_WIN32 */
2700
2701 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2702 With the exception of the softmmu code in this file, this should
2703 only be used for local memory (e.g. video ram) that the device owns,
2704 and knows it isn't going to access beyond the end of the block.
2705
2706 It should not be used for general purpose DMA.
2707 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2708 */
2709 void *qemu_get_ram_ptr(ram_addr_t addr)
2710 {
2711 RAMBlock *block;
2712
2713 QLIST_FOREACH(block, &ram_list.blocks, next) {
2714 if (addr - block->offset < block->length) {
2715 /* Move this entry to to start of the list. */
2716 if (block != QLIST_FIRST(&ram_list.blocks)) {
2717 QLIST_REMOVE(block, next);
2718 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2719 }
2720 if (xen_enabled()) {
2721 /* We need to check if the requested address is in the RAM
2722 * because we don't want to map the entire memory in QEMU.
2723 * In that case just map until the end of the page.
2724 */
2725 if (block->offset == 0) {
2726 return xen_map_cache(addr, 0, 0);
2727 } else if (block->host == NULL) {
2728 block->host =
2729 xen_map_cache(block->offset, block->length, 1);
2730 }
2731 }
2732 return block->host + (addr - block->offset);
2733 }
2734 }
2735
2736 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2737 abort();
2738
2739 return NULL;
2740 }
2741
2742 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2743 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2744 */
2745 void *qemu_safe_ram_ptr(ram_addr_t addr)
2746 {
2747 RAMBlock *block;
2748
2749 QLIST_FOREACH(block, &ram_list.blocks, next) {
2750 if (addr - block->offset < block->length) {
2751 if (xen_enabled()) {
2752 /* We need to check if the requested address is in the RAM
2753 * because we don't want to map the entire memory in QEMU.
2754 * In that case just map until the end of the page.
2755 */
2756 if (block->offset == 0) {
2757 return xen_map_cache(addr, 0, 0);
2758 } else if (block->host == NULL) {
2759 block->host =
2760 xen_map_cache(block->offset, block->length, 1);
2761 }
2762 }
2763 return block->host + (addr - block->offset);
2764 }
2765 }
2766
2767 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2768 abort();
2769
2770 return NULL;
2771 }
2772
2773 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2774 * but takes a size argument */
2775 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2776 {
2777 if (*size == 0) {
2778 return NULL;
2779 }
2780 if (xen_enabled()) {
2781 return xen_map_cache(addr, *size, 1);
2782 } else {
2783 RAMBlock *block;
2784
2785 QLIST_FOREACH(block, &ram_list.blocks, next) {
2786 if (addr - block->offset < block->length) {
2787 if (addr - block->offset + *size > block->length)
2788 *size = block->length - addr + block->offset;
2789 return block->host + (addr - block->offset);
2790 }
2791 }
2792
2793 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2794 abort();
2795 }
2796 }
2797
2798 void qemu_put_ram_ptr(void *addr)
2799 {
2800 trace_qemu_put_ram_ptr(addr);
2801 }
2802
2803 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2804 {
2805 RAMBlock *block;
2806 uint8_t *host = ptr;
2807
2808 if (xen_enabled()) {
2809 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2810 return 0;
2811 }
2812
2813 QLIST_FOREACH(block, &ram_list.blocks, next) {
2814 /* This case append when the block is not mapped. */
2815 if (block->host == NULL) {
2816 continue;
2817 }
2818 if (host - block->host < block->length) {
2819 *ram_addr = block->offset + (host - block->host);
2820 return 0;
2821 }
2822 }
2823
2824 return -1;
2825 }
2826
2827 /* Some of the softmmu routines need to translate from a host pointer
2828 (typically a TLB entry) back to a ram offset. */
2829 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2830 {
2831 ram_addr_t ram_addr;
2832
2833 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2834 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2835 abort();
2836 }
2837 return ram_addr;
2838 }
2839
2840 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2841 unsigned size)
2842 {
2843 #ifdef DEBUG_UNASSIGNED
2844 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2845 #endif
2846 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2847 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2848 #endif
2849 return 0;
2850 }
2851
2852 static void unassigned_mem_write(void *opaque, hwaddr addr,
2853 uint64_t val, unsigned size)
2854 {
2855 #ifdef DEBUG_UNASSIGNED
2856 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2857 #endif
2858 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2859 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2860 #endif
2861 }
2862
2863 static const MemoryRegionOps unassigned_mem_ops = {
2864 .read = unassigned_mem_read,
2865 .write = unassigned_mem_write,
2866 .endianness = DEVICE_NATIVE_ENDIAN,
2867 };
2868
2869 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2870 unsigned size)
2871 {
2872 abort();
2873 }
2874
2875 static void error_mem_write(void *opaque, hwaddr addr,
2876 uint64_t value, unsigned size)
2877 {
2878 abort();
2879 }
2880
2881 static const MemoryRegionOps error_mem_ops = {
2882 .read = error_mem_read,
2883 .write = error_mem_write,
2884 .endianness = DEVICE_NATIVE_ENDIAN,
2885 };
2886
2887 static const MemoryRegionOps rom_mem_ops = {
2888 .read = error_mem_read,
2889 .write = unassigned_mem_write,
2890 .endianness = DEVICE_NATIVE_ENDIAN,
2891 };
2892
2893 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2894 uint64_t val, unsigned size)
2895 {
2896 int dirty_flags;
2897 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2898 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2899 #if !defined(CONFIG_USER_ONLY)
2900 tb_invalidate_phys_page_fast(ram_addr, size);
2901 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2902 #endif
2903 }
2904 switch (size) {
2905 case 1:
2906 stb_p(qemu_get_ram_ptr(ram_addr), val);
2907 break;
2908 case 2:
2909 stw_p(qemu_get_ram_ptr(ram_addr), val);
2910 break;
2911 case 4:
2912 stl_p(qemu_get_ram_ptr(ram_addr), val);
2913 break;
2914 default:
2915 abort();
2916 }
2917 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2918 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2919 /* we remove the notdirty callback only if the code has been
2920 flushed */
2921 if (dirty_flags == 0xff)
2922 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2923 }
2924
2925 static const MemoryRegionOps notdirty_mem_ops = {
2926 .read = error_mem_read,
2927 .write = notdirty_mem_write,
2928 .endianness = DEVICE_NATIVE_ENDIAN,
2929 };
2930
2931 /* Generate a debug exception if a watchpoint has been hit. */
2932 static void check_watchpoint(int offset, int len_mask, int flags)
2933 {
2934 CPUArchState *env = cpu_single_env;
2935 target_ulong pc, cs_base;
2936 TranslationBlock *tb;
2937 target_ulong vaddr;
2938 CPUWatchpoint *wp;
2939 int cpu_flags;
2940
2941 if (env->watchpoint_hit) {
2942 /* We re-entered the check after replacing the TB. Now raise
2943 * the debug interrupt so that is will trigger after the
2944 * current instruction. */
2945 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2946 return;
2947 }
2948 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2949 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2950 if ((vaddr == (wp->vaddr & len_mask) ||
2951 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2952 wp->flags |= BP_WATCHPOINT_HIT;
2953 if (!env->watchpoint_hit) {
2954 env->watchpoint_hit = wp;
2955 tb = tb_find_pc(env->mem_io_pc);
2956 if (!tb) {
2957 cpu_abort(env, "check_watchpoint: could not find TB for "
2958 "pc=%p", (void *)env->mem_io_pc);
2959 }
2960 cpu_restore_state(tb, env, env->mem_io_pc);
2961 tb_phys_invalidate(tb, -1);
2962 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2963 env->exception_index = EXCP_DEBUG;
2964 cpu_loop_exit(env);
2965 } else {
2966 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2967 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2968 cpu_resume_from_signal(env, NULL);
2969 }
2970 }
2971 } else {
2972 wp->flags &= ~BP_WATCHPOINT_HIT;
2973 }
2974 }
2975 }
2976
2977 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2978 so these check for a hit then pass through to the normal out-of-line
2979 phys routines. */
2980 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2981 unsigned size)
2982 {
2983 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2984 switch (size) {
2985 case 1: return ldub_phys(addr);
2986 case 2: return lduw_phys(addr);
2987 case 4: return ldl_phys(addr);
2988 default: abort();
2989 }
2990 }
2991
2992 static void watch_mem_write(void *opaque, hwaddr addr,
2993 uint64_t val, unsigned size)
2994 {
2995 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2996 switch (size) {
2997 case 1:
2998 stb_phys(addr, val);
2999 break;
3000 case 2:
3001 stw_phys(addr, val);
3002 break;
3003 case 4:
3004 stl_phys(addr, val);
3005 break;
3006 default: abort();
3007 }
3008 }
3009
3010 static const MemoryRegionOps watch_mem_ops = {
3011 .read = watch_mem_read,
3012 .write = watch_mem_write,
3013 .endianness = DEVICE_NATIVE_ENDIAN,
3014 };
3015
3016 static uint64_t subpage_read(void *opaque, hwaddr addr,
3017 unsigned len)
3018 {
3019 subpage_t *mmio = opaque;
3020 unsigned int idx = SUBPAGE_IDX(addr);
3021 MemoryRegionSection *section;
3022 #if defined(DEBUG_SUBPAGE)
3023 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3024 mmio, len, addr, idx);
3025 #endif
3026
3027 section = &phys_sections[mmio->sub_section[idx]];
3028 addr += mmio->base;
3029 addr -= section->offset_within_address_space;
3030 addr += section->offset_within_region;
3031 return io_mem_read(section->mr, addr, len);
3032 }
3033
3034 static void subpage_write(void *opaque, hwaddr addr,
3035 uint64_t value, unsigned len)
3036 {
3037 subpage_t *mmio = opaque;
3038 unsigned int idx = SUBPAGE_IDX(addr);
3039 MemoryRegionSection *section;
3040 #if defined(DEBUG_SUBPAGE)
3041 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3042 " idx %d value %"PRIx64"\n",
3043 __func__, mmio, len, addr, idx, value);
3044 #endif
3045
3046 section = &phys_sections[mmio->sub_section[idx]];
3047 addr += mmio->base;
3048 addr -= section->offset_within_address_space;
3049 addr += section->offset_within_region;
3050 io_mem_write(section->mr, addr, value, len);
3051 }
3052
3053 static const MemoryRegionOps subpage_ops = {
3054 .read = subpage_read,
3055 .write = subpage_write,
3056 .endianness = DEVICE_NATIVE_ENDIAN,
3057 };
3058
3059 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3060 unsigned size)
3061 {
3062 ram_addr_t raddr = addr;
3063 void *ptr = qemu_get_ram_ptr(raddr);
3064 switch (size) {
3065 case 1: return ldub_p(ptr);
3066 case 2: return lduw_p(ptr);
3067 case 4: return ldl_p(ptr);
3068 default: abort();
3069 }
3070 }
3071
3072 static void subpage_ram_write(void *opaque, hwaddr addr,
3073 uint64_t value, unsigned size)
3074 {
3075 ram_addr_t raddr = addr;
3076 void *ptr = qemu_get_ram_ptr(raddr);
3077 switch (size) {
3078 case 1: return stb_p(ptr, value);
3079 case 2: return stw_p(ptr, value);
3080 case 4: return stl_p(ptr, value);
3081 default: abort();
3082 }
3083 }
3084
3085 static const MemoryRegionOps subpage_ram_ops = {
3086 .read = subpage_ram_read,
3087 .write = subpage_ram_write,
3088 .endianness = DEVICE_NATIVE_ENDIAN,
3089 };
3090
3091 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3092 uint16_t section)
3093 {
3094 int idx, eidx;
3095
3096 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3097 return -1;
3098 idx = SUBPAGE_IDX(start);
3099 eidx = SUBPAGE_IDX(end);
3100 #if defined(DEBUG_SUBPAGE)
3101 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3102 mmio, start, end, idx, eidx, memory);
3103 #endif
3104 if (memory_region_is_ram(phys_sections[section].mr)) {
3105 MemoryRegionSection new_section = phys_sections[section];
3106 new_section.mr = &io_mem_subpage_ram;
3107 section = phys_section_add(&new_section);
3108 }
3109 for (; idx <= eidx; idx++) {
3110 mmio->sub_section[idx] = section;
3111 }
3112
3113 return 0;
3114 }
3115
3116 static subpage_t *subpage_init(hwaddr base)
3117 {
3118 subpage_t *mmio;
3119
3120 mmio = g_malloc0(sizeof(subpage_t));
3121
3122 mmio->base = base;
3123 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3124 "subpage", TARGET_PAGE_SIZE);
3125 mmio->iomem.subpage = true;
3126 #if defined(DEBUG_SUBPAGE)
3127 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3128 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3129 #endif
3130 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3131
3132 return mmio;
3133 }
3134
3135 static uint16_t dummy_section(MemoryRegion *mr)
3136 {
3137 MemoryRegionSection section = {
3138 .mr = mr,
3139 .offset_within_address_space = 0,
3140 .offset_within_region = 0,
3141 .size = UINT64_MAX,
3142 };
3143
3144 return phys_section_add(&section);
3145 }
3146
3147 MemoryRegion *iotlb_to_region(hwaddr index)
3148 {
3149 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3150 }
3151
3152 static void io_mem_init(void)
3153 {
3154 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3155 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3156 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3157 "unassigned", UINT64_MAX);
3158 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3159 "notdirty", UINT64_MAX);
3160 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3161 "subpage-ram", UINT64_MAX);
3162 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3163 "watch", UINT64_MAX);
3164 }
3165
3166 static void mem_begin(MemoryListener *listener)
3167 {
3168 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3169
3170 destroy_all_mappings(d);
3171 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3172 }
3173
3174 static void core_begin(MemoryListener *listener)
3175 {
3176 phys_sections_clear();
3177 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3178 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3179 phys_section_rom = dummy_section(&io_mem_rom);
3180 phys_section_watch = dummy_section(&io_mem_watch);
3181 }
3182
3183 static void tcg_commit(MemoryListener *listener)
3184 {
3185 CPUArchState *env;
3186
3187 /* since each CPU stores ram addresses in its TLB cache, we must
3188 reset the modified entries */
3189 /* XXX: slow ! */
3190 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3191 tlb_flush(env, 1);
3192 }
3193 }
3194
3195 static void core_log_global_start(MemoryListener *listener)
3196 {
3197 cpu_physical_memory_set_dirty_tracking(1);
3198 }
3199
3200 static void core_log_global_stop(MemoryListener *listener)
3201 {
3202 cpu_physical_memory_set_dirty_tracking(0);
3203 }
3204
3205 static void io_region_add(MemoryListener *listener,
3206 MemoryRegionSection *section)
3207 {
3208 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3209
3210 mrio->mr = section->mr;
3211 mrio->offset = section->offset_within_region;
3212 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3213 section->offset_within_address_space, section->size);
3214 ioport_register(&mrio->iorange);
3215 }
3216
3217 static void io_region_del(MemoryListener *listener,
3218 MemoryRegionSection *section)
3219 {
3220 isa_unassign_ioport(section->offset_within_address_space, section->size);
3221 }
3222
3223 static MemoryListener core_memory_listener = {
3224 .begin = core_begin,
3225 .log_global_start = core_log_global_start,
3226 .log_global_stop = core_log_global_stop,
3227 .priority = 1,
3228 };
3229
3230 static MemoryListener io_memory_listener = {
3231 .region_add = io_region_add,
3232 .region_del = io_region_del,
3233 .priority = 0,
3234 };
3235
3236 static MemoryListener tcg_memory_listener = {
3237 .commit = tcg_commit,
3238 };
3239
3240 void address_space_init_dispatch(AddressSpace *as)
3241 {
3242 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3243
3244 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3245 d->listener = (MemoryListener) {
3246 .begin = mem_begin,
3247 .region_add = mem_add,
3248 .region_nop = mem_add,
3249 .priority = 0,
3250 };
3251 as->dispatch = d;
3252 memory_listener_register(&d->listener, as);
3253 }
3254
3255 void address_space_destroy_dispatch(AddressSpace *as)
3256 {
3257 AddressSpaceDispatch *d = as->dispatch;
3258
3259 memory_listener_unregister(&d->listener);
3260 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3261 g_free(d);
3262 as->dispatch = NULL;
3263 }
3264
3265 static void memory_map_init(void)
3266 {
3267 system_memory = g_malloc(sizeof(*system_memory));
3268 memory_region_init(system_memory, "system", INT64_MAX);
3269 address_space_init(&address_space_memory, system_memory);
3270 address_space_memory.name = "memory";
3271
3272 system_io = g_malloc(sizeof(*system_io));
3273 memory_region_init(system_io, "io", 65536);
3274 address_space_init(&address_space_io, system_io);
3275 address_space_io.name = "I/O";
3276
3277 memory_listener_register(&core_memory_listener, &address_space_memory);
3278 memory_listener_register(&io_memory_listener, &address_space_io);
3279 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3280 }
3281
3282 MemoryRegion *get_system_memory(void)
3283 {
3284 return system_memory;
3285 }
3286
3287 MemoryRegion *get_system_io(void)
3288 {
3289 return system_io;
3290 }
3291
3292 #endif /* !defined(CONFIG_USER_ONLY) */
3293
3294 /* physical memory access (slow version, mainly for debug) */
3295 #if defined(CONFIG_USER_ONLY)
3296 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3297 uint8_t *buf, int len, int is_write)
3298 {
3299 int l, flags;
3300 target_ulong page;
3301 void * p;
3302
3303 while (len > 0) {
3304 page = addr & TARGET_PAGE_MASK;
3305 l = (page + TARGET_PAGE_SIZE) - addr;
3306 if (l > len)
3307 l = len;
3308 flags = page_get_flags(page);
3309 if (!(flags & PAGE_VALID))
3310 return -1;
3311 if (is_write) {
3312 if (!(flags & PAGE_WRITE))
3313 return -1;
3314 /* XXX: this code should not depend on lock_user */
3315 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3316 return -1;
3317 memcpy(p, buf, l);
3318 unlock_user(p, addr, l);
3319 } else {
3320 if (!(flags & PAGE_READ))
3321 return -1;
3322 /* XXX: this code should not depend on lock_user */
3323 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3324 return -1;
3325 memcpy(buf, p, l);
3326 unlock_user(p, addr, 0);
3327 }
3328 len -= l;
3329 buf += l;
3330 addr += l;
3331 }
3332 return 0;
3333 }
3334
3335 #else
3336
3337 static void invalidate_and_set_dirty(hwaddr addr,
3338 hwaddr length)
3339 {
3340 if (!cpu_physical_memory_is_dirty(addr)) {
3341 /* invalidate code */
3342 tb_invalidate_phys_page_range(addr, addr + length, 0);
3343 /* set dirty bit */
3344 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3345 }
3346 xen_modified_memory(addr, length);
3347 }
3348
3349 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3350 int len, bool is_write)
3351 {
3352 AddressSpaceDispatch *d = as->dispatch;
3353 int l;
3354 uint8_t *ptr;
3355 uint32_t val;
3356 hwaddr page;
3357 MemoryRegionSection *section;
3358
3359 while (len > 0) {
3360 page = addr & TARGET_PAGE_MASK;
3361 l = (page + TARGET_PAGE_SIZE) - addr;
3362 if (l > len)
3363 l = len;
3364 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3365
3366 if (is_write) {
3367 if (!memory_region_is_ram(section->mr)) {
3368 hwaddr addr1;
3369 addr1 = memory_region_section_addr(section, addr);
3370 /* XXX: could force cpu_single_env to NULL to avoid
3371 potential bugs */
3372 if (l >= 4 && ((addr1 & 3) == 0)) {
3373 /* 32 bit write access */
3374 val = ldl_p(buf);
3375 io_mem_write(section->mr, addr1, val, 4);
3376 l = 4;
3377 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3378 /* 16 bit write access */
3379 val = lduw_p(buf);
3380 io_mem_write(section->mr, addr1, val, 2);
3381 l = 2;
3382 } else {
3383 /* 8 bit write access */
3384 val = ldub_p(buf);
3385 io_mem_write(section->mr, addr1, val, 1);
3386 l = 1;
3387 }
3388 } else if (!section->readonly) {
3389 ram_addr_t addr1;
3390 addr1 = memory_region_get_ram_addr(section->mr)
3391 + memory_region_section_addr(section, addr);
3392 /* RAM case */
3393 ptr = qemu_get_ram_ptr(addr1);
3394 memcpy(ptr, buf, l);
3395 invalidate_and_set_dirty(addr1, l);
3396 qemu_put_ram_ptr(ptr);
3397 }
3398 } else {
3399 if (!(memory_region_is_ram(section->mr) ||
3400 memory_region_is_romd(section->mr))) {
3401 hwaddr addr1;
3402 /* I/O case */
3403 addr1 = memory_region_section_addr(section, addr);
3404 if (l >= 4 && ((addr1 & 3) == 0)) {
3405 /* 32 bit read access */
3406 val = io_mem_read(section->mr, addr1, 4);
3407 stl_p(buf, val);
3408 l = 4;
3409 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3410 /* 16 bit read access */
3411 val = io_mem_read(section->mr, addr1, 2);
3412 stw_p(buf, val);
3413 l = 2;
3414 } else {
3415 /* 8 bit read access */
3416 val = io_mem_read(section->mr, addr1, 1);
3417 stb_p(buf, val);
3418 l = 1;
3419 }
3420 } else {
3421 /* RAM case */
3422 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3423 + memory_region_section_addr(section,
3424 addr));
3425 memcpy(buf, ptr, l);
3426 qemu_put_ram_ptr(ptr);
3427 }
3428 }
3429 len -= l;
3430 buf += l;
3431 addr += l;
3432 }
3433 }
3434
3435 void address_space_write(AddressSpace *as, hwaddr addr,
3436 const uint8_t *buf, int len)
3437 {
3438 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3439 }
3440
3441 /**
3442 * address_space_read: read from an address space.
3443 *
3444 * @as: #AddressSpace to be accessed
3445 * @addr: address within that address space
3446 * @buf: buffer with the data transferred
3447 */
3448 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3449 {
3450 address_space_rw(as, addr, buf, len, false);
3451 }
3452
3453
3454 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3455 int len, int is_write)
3456 {
3457 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3458 }
3459
3460 /* used for ROM loading : can write in RAM and ROM */
3461 void cpu_physical_memory_write_rom(hwaddr addr,
3462 const uint8_t *buf, int len)
3463 {
3464 AddressSpaceDispatch *d = address_space_memory.dispatch;
3465 int l;
3466 uint8_t *ptr;
3467 hwaddr page;
3468 MemoryRegionSection *section;
3469
3470 while (len > 0) {
3471 page = addr & TARGET_PAGE_MASK;
3472 l = (page + TARGET_PAGE_SIZE) - addr;
3473 if (l > len)
3474 l = len;
3475 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3476
3477 if (!(memory_region_is_ram(section->mr) ||
3478 memory_region_is_romd(section->mr))) {
3479 /* do nothing */
3480 } else {
3481 unsigned long addr1;
3482 addr1 = memory_region_get_ram_addr(section->mr)
3483 + memory_region_section_addr(section, addr);
3484 /* ROM/RAM case */
3485 ptr = qemu_get_ram_ptr(addr1);
3486 memcpy(ptr, buf, l);
3487 invalidate_and_set_dirty(addr1, l);
3488 qemu_put_ram_ptr(ptr);
3489 }
3490 len -= l;
3491 buf += l;
3492 addr += l;
3493 }
3494 }
3495
3496 typedef struct {
3497 void *buffer;
3498 hwaddr addr;
3499 hwaddr len;
3500 } BounceBuffer;
3501
3502 static BounceBuffer bounce;
3503
3504 typedef struct MapClient {
3505 void *opaque;
3506 void (*callback)(void *opaque);
3507 QLIST_ENTRY(MapClient) link;
3508 } MapClient;
3509
3510 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3511 = QLIST_HEAD_INITIALIZER(map_client_list);
3512
3513 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3514 {
3515 MapClient *client = g_malloc(sizeof(*client));
3516
3517 client->opaque = opaque;
3518 client->callback = callback;
3519 QLIST_INSERT_HEAD(&map_client_list, client, link);
3520 return client;
3521 }
3522
3523 void cpu_unregister_map_client(void *_client)
3524 {
3525 MapClient *client = (MapClient *)_client;
3526
3527 QLIST_REMOVE(client, link);
3528 g_free(client);
3529 }
3530
3531 static void cpu_notify_map_clients(void)
3532 {
3533 MapClient *client;
3534
3535 while (!QLIST_EMPTY(&map_client_list)) {
3536 client = QLIST_FIRST(&map_client_list);
3537 client->callback(client->opaque);
3538 cpu_unregister_map_client(client);
3539 }
3540 }
3541
3542 /* Map a physical memory region into a host virtual address.
3543 * May map a subset of the requested range, given by and returned in *plen.
3544 * May return NULL if resources needed to perform the mapping are exhausted.
3545 * Use only for reads OR writes - not for read-modify-write operations.
3546 * Use cpu_register_map_client() to know when retrying the map operation is
3547 * likely to succeed.
3548 */
3549 void *address_space_map(AddressSpace *as,
3550 hwaddr addr,
3551 hwaddr *plen,
3552 bool is_write)
3553 {
3554 AddressSpaceDispatch *d = as->dispatch;
3555 hwaddr len = *plen;
3556 hwaddr todo = 0;
3557 int l;
3558 hwaddr page;
3559 MemoryRegionSection *section;
3560 ram_addr_t raddr = RAM_ADDR_MAX;
3561 ram_addr_t rlen;
3562 void *ret;
3563
3564 while (len > 0) {
3565 page = addr & TARGET_PAGE_MASK;
3566 l = (page + TARGET_PAGE_SIZE) - addr;
3567 if (l > len)
3568 l = len;
3569 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3570
3571 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3572 if (todo || bounce.buffer) {
3573 break;
3574 }
3575 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3576 bounce.addr = addr;
3577 bounce.len = l;
3578 if (!is_write) {
3579 address_space_read(as, addr, bounce.buffer, l);
3580 }
3581
3582 *plen = l;
3583 return bounce.buffer;
3584 }
3585 if (!todo) {
3586 raddr = memory_region_get_ram_addr(section->mr)
3587 + memory_region_section_addr(section, addr);
3588 }
3589
3590 len -= l;
3591 addr += l;
3592 todo += l;
3593 }
3594 rlen = todo;
3595 ret = qemu_ram_ptr_length(raddr, &rlen);
3596 *plen = rlen;
3597 return ret;
3598 }
3599
3600 /* Unmaps a memory region previously mapped by address_space_map().
3601 * Will also mark the memory as dirty if is_write == 1. access_len gives
3602 * the amount of memory that was actually read or written by the caller.
3603 */
3604 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3605 int is_write, hwaddr access_len)
3606 {
3607 if (buffer != bounce.buffer) {
3608 if (is_write) {
3609 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3610 while (access_len) {
3611 unsigned l;
3612 l = TARGET_PAGE_SIZE;
3613 if (l > access_len)
3614 l = access_len;
3615 invalidate_and_set_dirty(addr1, l);
3616 addr1 += l;
3617 access_len -= l;
3618 }
3619 }
3620 if (xen_enabled()) {
3621 xen_invalidate_map_cache_entry(buffer);
3622 }
3623 return;
3624 }
3625 if (is_write) {
3626 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3627 }
3628 qemu_vfree(bounce.buffer);
3629 bounce.buffer = NULL;
3630 cpu_notify_map_clients();
3631 }
3632
3633 void *cpu_physical_memory_map(hwaddr addr,
3634 hwaddr *plen,
3635 int is_write)
3636 {
3637 return address_space_map(&address_space_memory, addr, plen, is_write);
3638 }
3639
3640 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3641 int is_write, hwaddr access_len)
3642 {
3643 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3644 }
3645
3646 /* warning: addr must be aligned */
3647 static inline uint32_t ldl_phys_internal(hwaddr addr,
3648 enum device_endian endian)
3649 {
3650 uint8_t *ptr;
3651 uint32_t val;
3652 MemoryRegionSection *section;
3653
3654 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3655
3656 if (!(memory_region_is_ram(section->mr) ||
3657 memory_region_is_romd(section->mr))) {
3658 /* I/O case */
3659 addr = memory_region_section_addr(section, addr);
3660 val = io_mem_read(section->mr, addr, 4);
3661 #if defined(TARGET_WORDS_BIGENDIAN)
3662 if (endian == DEVICE_LITTLE_ENDIAN) {
3663 val = bswap32(val);
3664 }
3665 #else
3666 if (endian == DEVICE_BIG_ENDIAN) {
3667 val = bswap32(val);
3668 }
3669 #endif
3670 } else {
3671 /* RAM case */
3672 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3673 & TARGET_PAGE_MASK)
3674 + memory_region_section_addr(section, addr));
3675 switch (endian) {
3676 case DEVICE_LITTLE_ENDIAN:
3677 val = ldl_le_p(ptr);
3678 break;
3679 case DEVICE_BIG_ENDIAN:
3680 val = ldl_be_p(ptr);
3681 break;
3682 default:
3683 val = ldl_p(ptr);
3684 break;
3685 }
3686 }
3687 return val;
3688 }
3689
3690 uint32_t ldl_phys(hwaddr addr)
3691 {
3692 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3693 }
3694
3695 uint32_t ldl_le_phys(hwaddr addr)
3696 {
3697 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3698 }
3699
3700 uint32_t ldl_be_phys(hwaddr addr)
3701 {
3702 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3703 }
3704
3705 /* warning: addr must be aligned */
3706 static inline uint64_t ldq_phys_internal(hwaddr addr,
3707 enum device_endian endian)
3708 {
3709 uint8_t *ptr;
3710 uint64_t val;
3711 MemoryRegionSection *section;
3712
3713 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3714
3715 if (!(memory_region_is_ram(section->mr) ||
3716 memory_region_is_romd(section->mr))) {
3717 /* I/O case */
3718 addr = memory_region_section_addr(section, addr);
3719
3720 /* XXX This is broken when device endian != cpu endian.
3721 Fix and add "endian" variable check */
3722 #ifdef TARGET_WORDS_BIGENDIAN
3723 val = io_mem_read(section->mr, addr, 4) << 32;
3724 val |= io_mem_read(section->mr, addr + 4, 4);
3725 #else
3726 val = io_mem_read(section->mr, addr, 4);
3727 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3728 #endif
3729 } else {
3730 /* RAM case */
3731 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3732 & TARGET_PAGE_MASK)
3733 + memory_region_section_addr(section, addr));
3734 switch (endian) {
3735 case DEVICE_LITTLE_ENDIAN:
3736 val = ldq_le_p(ptr);
3737 break;
3738 case DEVICE_BIG_ENDIAN:
3739 val = ldq_be_p(ptr);
3740 break;
3741 default:
3742 val = ldq_p(ptr);
3743 break;
3744 }
3745 }
3746 return val;
3747 }
3748
3749 uint64_t ldq_phys(hwaddr addr)
3750 {
3751 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3752 }
3753
3754 uint64_t ldq_le_phys(hwaddr addr)
3755 {
3756 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3757 }
3758
3759 uint64_t ldq_be_phys(hwaddr addr)
3760 {
3761 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3762 }
3763
3764 /* XXX: optimize */
3765 uint32_t ldub_phys(hwaddr addr)
3766 {
3767 uint8_t val;
3768 cpu_physical_memory_read(addr, &val, 1);
3769 return val;
3770 }
3771
3772 /* warning: addr must be aligned */
3773 static inline uint32_t lduw_phys_internal(hwaddr addr,
3774 enum device_endian endian)
3775 {
3776 uint8_t *ptr;
3777 uint64_t val;
3778 MemoryRegionSection *section;
3779
3780 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3781
3782 if (!(memory_region_is_ram(section->mr) ||
3783 memory_region_is_romd(section->mr))) {
3784 /* I/O case */
3785 addr = memory_region_section_addr(section, addr);
3786 val = io_mem_read(section->mr, addr, 2);
3787 #if defined(TARGET_WORDS_BIGENDIAN)
3788 if (endian == DEVICE_LITTLE_ENDIAN) {
3789 val = bswap16(val);
3790 }
3791 #else
3792 if (endian == DEVICE_BIG_ENDIAN) {
3793 val = bswap16(val);
3794 }
3795 #endif
3796 } else {
3797 /* RAM case */
3798 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3799 & TARGET_PAGE_MASK)
3800 + memory_region_section_addr(section, addr));
3801 switch (endian) {
3802 case DEVICE_LITTLE_ENDIAN:
3803 val = lduw_le_p(ptr);
3804 break;
3805 case DEVICE_BIG_ENDIAN:
3806 val = lduw_be_p(ptr);
3807 break;
3808 default:
3809 val = lduw_p(ptr);
3810 break;
3811 }
3812 }
3813 return val;
3814 }
3815
3816 uint32_t lduw_phys(hwaddr addr)
3817 {
3818 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3819 }
3820
3821 uint32_t lduw_le_phys(hwaddr addr)
3822 {
3823 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3824 }
3825
3826 uint32_t lduw_be_phys(hwaddr addr)
3827 {
3828 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3829 }
3830
3831 /* warning: addr must be aligned. The ram page is not masked as dirty
3832 and the code inside is not invalidated. It is useful if the dirty
3833 bits are used to track modified PTEs */
3834 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3835 {
3836 uint8_t *ptr;
3837 MemoryRegionSection *section;
3838
3839 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3840
3841 if (!memory_region_is_ram(section->mr) || section->readonly) {
3842 addr = memory_region_section_addr(section, addr);
3843 if (memory_region_is_ram(section->mr)) {
3844 section = &phys_sections[phys_section_rom];
3845 }
3846 io_mem_write(section->mr, addr, val, 4);
3847 } else {
3848 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3849 & TARGET_PAGE_MASK)
3850 + memory_region_section_addr(section, addr);
3851 ptr = qemu_get_ram_ptr(addr1);
3852 stl_p(ptr, val);
3853
3854 if (unlikely(in_migration)) {
3855 if (!cpu_physical_memory_is_dirty(addr1)) {
3856 /* invalidate code */
3857 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3858 /* set dirty bit */
3859 cpu_physical_memory_set_dirty_flags(
3860 addr1, (0xff & ~CODE_DIRTY_FLAG));
3861 }
3862 }
3863 }
3864 }
3865
3866 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3867 {
3868 uint8_t *ptr;
3869 MemoryRegionSection *section;
3870
3871 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3872
3873 if (!memory_region_is_ram(section->mr) || section->readonly) {
3874 addr = memory_region_section_addr(section, addr);
3875 if (memory_region_is_ram(section->mr)) {
3876 section = &phys_sections[phys_section_rom];
3877 }
3878 #ifdef TARGET_WORDS_BIGENDIAN
3879 io_mem_write(section->mr, addr, val >> 32, 4);
3880 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3881 #else
3882 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3883 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3884 #endif
3885 } else {
3886 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3887 & TARGET_PAGE_MASK)
3888 + memory_region_section_addr(section, addr));
3889 stq_p(ptr, val);
3890 }
3891 }
3892
3893 /* warning: addr must be aligned */
3894 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3895 enum device_endian endian)
3896 {
3897 uint8_t *ptr;
3898 MemoryRegionSection *section;
3899
3900 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3901
3902 if (!memory_region_is_ram(section->mr) || section->readonly) {
3903 addr = memory_region_section_addr(section, addr);
3904 if (memory_region_is_ram(section->mr)) {
3905 section = &phys_sections[phys_section_rom];
3906 }
3907 #if defined(TARGET_WORDS_BIGENDIAN)
3908 if (endian == DEVICE_LITTLE_ENDIAN) {
3909 val = bswap32(val);
3910 }
3911 #else
3912 if (endian == DEVICE_BIG_ENDIAN) {
3913 val = bswap32(val);
3914 }
3915 #endif
3916 io_mem_write(section->mr, addr, val, 4);
3917 } else {
3918 unsigned long addr1;
3919 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3920 + memory_region_section_addr(section, addr);
3921 /* RAM case */
3922 ptr = qemu_get_ram_ptr(addr1);
3923 switch (endian) {
3924 case DEVICE_LITTLE_ENDIAN:
3925 stl_le_p(ptr, val);
3926 break;
3927 case DEVICE_BIG_ENDIAN:
3928 stl_be_p(ptr, val);
3929 break;
3930 default:
3931 stl_p(ptr, val);
3932 break;
3933 }
3934 invalidate_and_set_dirty(addr1, 4);
3935 }
3936 }
3937
3938 void stl_phys(hwaddr addr, uint32_t val)
3939 {
3940 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3941 }
3942
3943 void stl_le_phys(hwaddr addr, uint32_t val)
3944 {
3945 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3946 }
3947
3948 void stl_be_phys(hwaddr addr, uint32_t val)
3949 {
3950 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3951 }
3952
3953 /* XXX: optimize */
3954 void stb_phys(hwaddr addr, uint32_t val)
3955 {
3956 uint8_t v = val;
3957 cpu_physical_memory_write(addr, &v, 1);
3958 }
3959
3960 /* warning: addr must be aligned */
3961 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3962 enum device_endian endian)
3963 {
3964 uint8_t *ptr;
3965 MemoryRegionSection *section;
3966
3967 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3968
3969 if (!memory_region_is_ram(section->mr) || section->readonly) {
3970 addr = memory_region_section_addr(section, addr);
3971 if (memory_region_is_ram(section->mr)) {
3972 section = &phys_sections[phys_section_rom];
3973 }
3974 #if defined(TARGET_WORDS_BIGENDIAN)
3975 if (endian == DEVICE_LITTLE_ENDIAN) {
3976 val = bswap16(val);
3977 }
3978 #else
3979 if (endian == DEVICE_BIG_ENDIAN) {
3980 val = bswap16(val);
3981 }
3982 #endif
3983 io_mem_write(section->mr, addr, val, 2);
3984 } else {
3985 unsigned long addr1;
3986 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3987 + memory_region_section_addr(section, addr);
3988 /* RAM case */
3989 ptr = qemu_get_ram_ptr(addr1);
3990 switch (endian) {
3991 case DEVICE_LITTLE_ENDIAN:
3992 stw_le_p(ptr, val);
3993 break;
3994 case DEVICE_BIG_ENDIAN:
3995 stw_be_p(ptr, val);
3996 break;
3997 default:
3998 stw_p(ptr, val);
3999 break;
4000 }
4001 invalidate_and_set_dirty(addr1, 2);
4002 }
4003 }
4004
4005 void stw_phys(hwaddr addr, uint32_t val)
4006 {
4007 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4008 }
4009
4010 void stw_le_phys(hwaddr addr, uint32_t val)
4011 {
4012 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4013 }
4014
4015 void stw_be_phys(hwaddr addr, uint32_t val)
4016 {
4017 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4018 }
4019
4020 /* XXX: optimize */
4021 void stq_phys(hwaddr addr, uint64_t val)
4022 {
4023 val = tswap64(val);
4024 cpu_physical_memory_write(addr, &val, 8);
4025 }
4026
4027 void stq_le_phys(hwaddr addr, uint64_t val)
4028 {
4029 val = cpu_to_le64(val);
4030 cpu_physical_memory_write(addr, &val, 8);
4031 }
4032
4033 void stq_be_phys(hwaddr addr, uint64_t val)
4034 {
4035 val = cpu_to_be64(val);
4036 cpu_physical_memory_write(addr, &val, 8);
4037 }
4038
4039 /* virtual memory access for debug (includes writing to ROM) */
4040 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4041 uint8_t *buf, int len, int is_write)
4042 {
4043 int l;
4044 hwaddr phys_addr;
4045 target_ulong page;
4046
4047 while (len > 0) {
4048 page = addr & TARGET_PAGE_MASK;
4049 phys_addr = cpu_get_phys_page_debug(env, page);
4050 /* if no physical page mapped, return an error */
4051 if (phys_addr == -1)
4052 return -1;
4053 l = (page + TARGET_PAGE_SIZE) - addr;
4054 if (l > len)
4055 l = len;
4056 phys_addr += (addr & ~TARGET_PAGE_MASK);
4057 if (is_write)
4058 cpu_physical_memory_write_rom(phys_addr, buf, l);
4059 else
4060 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4061 len -= l;
4062 buf += l;
4063 addr += l;
4064 }
4065 return 0;
4066 }
4067 #endif
4068
4069 /* in deterministic execution mode, instructions doing device I/Os
4070 must be at the end of the TB */
4071 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4072 {
4073 TranslationBlock *tb;
4074 uint32_t n, cflags;
4075 target_ulong pc, cs_base;
4076 uint64_t flags;
4077
4078 tb = tb_find_pc(retaddr);
4079 if (!tb) {
4080 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4081 (void *)retaddr);
4082 }
4083 n = env->icount_decr.u16.low + tb->icount;
4084 cpu_restore_state(tb, env, retaddr);
4085 /* Calculate how many instructions had been executed before the fault
4086 occurred. */
4087 n = n - env->icount_decr.u16.low;
4088 /* Generate a new TB ending on the I/O insn. */
4089 n++;
4090 /* On MIPS and SH, delay slot instructions can only be restarted if
4091 they were already the first instruction in the TB. If this is not
4092 the first instruction in a TB then re-execute the preceding
4093 branch. */
4094 #if defined(TARGET_MIPS)
4095 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4096 env->active_tc.PC -= 4;
4097 env->icount_decr.u16.low++;
4098 env->hflags &= ~MIPS_HFLAG_BMASK;
4099 }
4100 #elif defined(TARGET_SH4)
4101 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4102 && n > 1) {
4103 env->pc -= 2;
4104 env->icount_decr.u16.low++;
4105 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4106 }
4107 #endif
4108 /* This should never happen. */
4109 if (n > CF_COUNT_MASK)
4110 cpu_abort(env, "TB too big during recompile");
4111
4112 cflags = n | CF_LAST_IO;
4113 pc = tb->pc;
4114 cs_base = tb->cs_base;
4115 flags = tb->flags;
4116 tb_phys_invalidate(tb, -1);
4117 /* FIXME: In theory this could raise an exception. In practice
4118 we have already translated the block once so it's probably ok. */
4119 tb_gen_code(env, pc, cs_base, flags, cflags);
4120 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4121 the first in the TB) then we end up generating a whole new TB and
4122 repeating the fault, which is horribly inefficient.
4123 Better would be to execute just this insn uncached, or generate a
4124 second new TB. */
4125 cpu_resume_from_signal(env, NULL);
4126 }
4127
4128 #if !defined(CONFIG_USER_ONLY)
4129
4130 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4131 {
4132 int i, target_code_size, max_target_code_size;
4133 int direct_jmp_count, direct_jmp2_count, cross_page;
4134 TranslationBlock *tb;
4135
4136 target_code_size = 0;
4137 max_target_code_size = 0;
4138 cross_page = 0;
4139 direct_jmp_count = 0;
4140 direct_jmp2_count = 0;
4141 for(i = 0; i < nb_tbs; i++) {
4142 tb = &tbs[i];
4143 target_code_size += tb->size;
4144 if (tb->size > max_target_code_size)
4145 max_target_code_size = tb->size;
4146 if (tb->page_addr[1] != -1)
4147 cross_page++;
4148 if (tb->tb_next_offset[0] != 0xffff) {
4149 direct_jmp_count++;
4150 if (tb->tb_next_offset[1] != 0xffff) {
4151 direct_jmp2_count++;
4152 }
4153 }
4154 }
4155 /* XXX: avoid using doubles ? */
4156 cpu_fprintf(f, "Translation buffer state:\n");
4157 cpu_fprintf(f, "gen code size %td/%zd\n",
4158 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4159 cpu_fprintf(f, "TB count %d/%d\n",
4160 nb_tbs, code_gen_max_blocks);
4161 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4162 nb_tbs ? target_code_size / nb_tbs : 0,
4163 max_target_code_size);
4164 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4165 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4166 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4167 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4168 cross_page,
4169 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4170 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4171 direct_jmp_count,
4172 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4173 direct_jmp2_count,
4174 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4175 cpu_fprintf(f, "\nStatistics:\n");
4176 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4177 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4178 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4179 tcg_dump_info(f, cpu_fprintf);
4180 }
4181
4182 /*
4183 * A helper function for the _utterly broken_ virtio device model to find out if
4184 * it's running on a big endian machine. Don't do this at home kids!
4185 */
4186 bool virtio_is_big_endian(void);
4187 bool virtio_is_big_endian(void)
4188 {
4189 #if defined(TARGET_WORDS_BIGENDIAN)
4190 return true;
4191 #else
4192 return false;
4193 #endif
4194 }
4195
4196 #endif
4197
4198 #ifndef CONFIG_USER_ONLY
4199 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4200 {
4201 MemoryRegionSection *section;
4202
4203 section = phys_page_find(address_space_memory.dispatch,
4204 phys_addr >> TARGET_PAGE_BITS);
4205
4206 return !(memory_region_is_ram(section->mr) ||
4207 memory_region_is_romd(section->mr));
4208 }
4209 #endif