]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
tcg: Add extended GETPC mechanism for MMU helpers with ldst optimization
[mirror_qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
94
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
98
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
103
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
106
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
109
110 #endif
111
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
120
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
132
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
144
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
148
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
161
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
163
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
169
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
173
174 #if !defined(CONFIG_USER_ONLY)
175
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
182
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191
192 static MemoryRegion io_mem_watch;
193 #endif
194
195 /* statistics */
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
198
199 #ifdef _WIN32
200 static inline void map_exec(void *addr, long size)
201 {
202 DWORD old_protect;
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
205
206 }
207 #else
208 static inline void map_exec(void *addr, long size)
209 {
210 unsigned long start, end, page_size;
211
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
215
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
219
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
222 }
223 #endif
224
225 static void page_init(void)
226 {
227 /* NOTE: we can always suppose that qemu_host_page_size >=
228 TARGET_PAGE_SIZE */
229 #ifdef _WIN32
230 {
231 SYSTEM_INFO system_info;
232
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
235 }
236 #else
237 qemu_real_host_page_size = getpagesize();
238 #endif
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
244
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
246 {
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
249 int i, cnt;
250
251 freep = kinfo_getvmmap(getpid(), &cnt);
252 if (freep) {
253 mmap_lock();
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
256
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
261
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265 } else {
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267 endaddr = ~0ul;
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #endif
270 }
271 }
272 }
273 free(freep);
274 mmap_unlock();
275 }
276 #else
277 FILE *f;
278
279 last_brk = (unsigned long)sbrk(0);
280
281 f = fopen("/compat/linux/proc/self/maps", "r");
282 if (f) {
283 mmap_lock();
284
285 do {
286 unsigned long startaddr, endaddr;
287 int n;
288
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
290
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
296 } else {
297 endaddr = ~0ul;
298 }
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 }
301 } while (!feof(f));
302
303 fclose(f);
304 mmap_unlock();
305 }
306 #endif
307 }
308 #endif
309 }
310
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
312 {
313 PageDesc *pd;
314 void **lp;
315 int i;
316
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
320 do { \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
323 } while (0)
324 #else
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
327 #endif
328
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
331
332 /* Level 2..N-1. */
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334 void **p = *lp;
335
336 if (p == NULL) {
337 if (!alloc) {
338 return NULL;
339 }
340 ALLOC(p, sizeof(void *) * L2_SIZE);
341 *lp = p;
342 }
343
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
345 }
346
347 pd = *lp;
348 if (pd == NULL) {
349 if (!alloc) {
350 return NULL;
351 }
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353 *lp = pd;
354 }
355
356 #undef ALLOC
357
358 return pd + (index & (L2_SIZE - 1));
359 }
360
361 static inline PageDesc *page_find(tb_page_addr_t index)
362 {
363 return page_find_alloc(index, 0);
364 }
365
366 #if !defined(CONFIG_USER_ONLY)
367
368 static void phys_map_node_reserve(unsigned nodes)
369 {
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
377 }
378 }
379
380 static uint16_t phys_map_node_alloc(void)
381 {
382 unsigned i;
383 uint16_t ret;
384
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
391 }
392 return ret;
393 }
394
395 static void phys_map_nodes_reset(void)
396 {
397 phys_map_nodes_nb = 0;
398 }
399
400
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
403 int level)
404 {
405 PhysPageEntry *p;
406 int i;
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
408
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
412 if (level == 0) {
413 for (i = 0; i < L2_SIZE; i++) {
414 p[i].is_leaf = 1;
415 p[i].ptr = phys_section_unassigned;
416 }
417 }
418 } else {
419 p = phys_map_nodes[lp->ptr];
420 }
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
422
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
425 lp->is_leaf = true;
426 lp->ptr = leaf;
427 *index += step;
428 *nb -= step;
429 } else {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
431 }
432 ++lp;
433 }
434 }
435
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
438 uint16_t leaf)
439 {
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
442
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
444 }
445
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
447 {
448 PhysPageEntry lp = d->phys_map;
449 PhysPageEntry *p;
450 int i;
451 uint16_t s_index = phys_section_unassigned;
452
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
455 goto not_found;
456 }
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
459 }
460
461 s_index = lp.ptr;
462 not_found:
463 return &phys_sections[s_index];
464 }
465
466 bool memory_region_is_unassigned(MemoryRegion *mr)
467 {
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
471 }
472
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
475 #endif
476
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
483 #endif
484
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
490 # define USE_MMAP
491 #endif
492
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
496
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
509 #else
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
511 #endif
512
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
514
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
518
519 static inline size_t size_code_gen_buffer(size_t tb_size)
520 {
521 /* Size the buffer. */
522 if (tb_size == 0) {
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525 #else
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
531 #endif
532 }
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
535 }
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
538 }
539 code_gen_buffer_size = tb_size;
540 return tb_size;
541 }
542
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
546
547 static inline void *alloc_code_gen_buffer(void)
548 {
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
551 }
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
554 {
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556 uintptr_t start = 0;
557 void *buf;
558
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
570 flags |= MAP_32BIT;
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
574 }
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
579 # endif
580
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
584 }
585 #else
586 static inline void *alloc_code_gen_buffer(void)
587 {
588 void *buf = g_malloc(code_gen_buffer_size);
589 if (buf) {
590 map_exec(buf, code_gen_buffer_size);
591 }
592 return buf;
593 }
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
595
596 static inline void code_gen_alloc(size_t tb_size)
597 {
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602 exit(1);
603 }
604
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
609 as executable. */
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
612
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
617 }
618
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
621 size. */
622 void tcg_exec_init(unsigned long tb_size)
623 {
624 cpu_gen_init();
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628 page_init();
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
633 #endif
634 }
635
636 bool tcg_enabled(void)
637 {
638 return code_gen_buffer != NULL;
639 }
640
641 void cpu_exec_init_all(void)
642 {
643 #if !defined(CONFIG_USER_ONLY)
644 memory_map_init();
645 io_mem_init();
646 #endif
647 }
648
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
650
651 static int cpu_common_post_load(void *opaque, int version_id)
652 {
653 CPUArchState *env = opaque;
654
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
658 tlb_flush(env, 1);
659
660 return 0;
661 }
662
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675 #endif
676
677 CPUArchState *qemu_get_cpu(int cpu)
678 {
679 CPUArchState *env = first_cpu;
680
681 while (env) {
682 if (env->cpu_index == cpu)
683 break;
684 env = env->next_cpu;
685 }
686
687 return env;
688 }
689
690 void cpu_exec_init(CPUArchState *env)
691 {
692 #ifndef CONFIG_USER_ONLY
693 CPUState *cpu = ENV_GET_CPU(env);
694 #endif
695 CPUArchState **penv;
696 int cpu_index;
697
698 #if defined(CONFIG_USER_ONLY)
699 cpu_list_lock();
700 #endif
701 env->next_cpu = NULL;
702 penv = &first_cpu;
703 cpu_index = 0;
704 while (*penv != NULL) {
705 penv = &(*penv)->next_cpu;
706 cpu_index++;
707 }
708 env->cpu_index = cpu_index;
709 env->numa_node = 0;
710 QTAILQ_INIT(&env->breakpoints);
711 QTAILQ_INIT(&env->watchpoints);
712 #ifndef CONFIG_USER_ONLY
713 cpu->thread_id = qemu_get_thread_id();
714 #endif
715 *penv = env;
716 #if defined(CONFIG_USER_ONLY)
717 cpu_list_unlock();
718 #endif
719 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
720 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
721 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
722 cpu_save, cpu_load, env);
723 #endif
724 }
725
726 /* Allocate a new translation block. Flush the translation buffer if
727 too many translation blocks or too much generated code. */
728 static TranslationBlock *tb_alloc(target_ulong pc)
729 {
730 TranslationBlock *tb;
731
732 if (nb_tbs >= code_gen_max_blocks ||
733 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
734 return NULL;
735 tb = &tbs[nb_tbs++];
736 tb->pc = pc;
737 tb->cflags = 0;
738 return tb;
739 }
740
741 void tb_free(TranslationBlock *tb)
742 {
743 /* In practice this is mostly used for single use temporary TB
744 Ignore the hard cases and just back up if this TB happens to
745 be the last one generated. */
746 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
747 code_gen_ptr = tb->tc_ptr;
748 nb_tbs--;
749 }
750 }
751
752 static inline void invalidate_page_bitmap(PageDesc *p)
753 {
754 if (p->code_bitmap) {
755 g_free(p->code_bitmap);
756 p->code_bitmap = NULL;
757 }
758 p->code_write_count = 0;
759 }
760
761 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
762
763 static void page_flush_tb_1 (int level, void **lp)
764 {
765 int i;
766
767 if (*lp == NULL) {
768 return;
769 }
770 if (level == 0) {
771 PageDesc *pd = *lp;
772 for (i = 0; i < L2_SIZE; ++i) {
773 pd[i].first_tb = NULL;
774 invalidate_page_bitmap(pd + i);
775 }
776 } else {
777 void **pp = *lp;
778 for (i = 0; i < L2_SIZE; ++i) {
779 page_flush_tb_1 (level - 1, pp + i);
780 }
781 }
782 }
783
784 static void page_flush_tb(void)
785 {
786 int i;
787 for (i = 0; i < V_L1_SIZE; i++) {
788 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
789 }
790 }
791
792 /* flush all the translation blocks */
793 /* XXX: tb_flush is currently not thread safe */
794 void tb_flush(CPUArchState *env1)
795 {
796 CPUArchState *env;
797 #if defined(DEBUG_FLUSH)
798 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
799 (unsigned long)(code_gen_ptr - code_gen_buffer),
800 nb_tbs, nb_tbs > 0 ?
801 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
802 #endif
803 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
804 cpu_abort(env1, "Internal error: code buffer overflow\n");
805
806 nb_tbs = 0;
807
808 for(env = first_cpu; env != NULL; env = env->next_cpu) {
809 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
810 }
811
812 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
813 page_flush_tb();
814
815 code_gen_ptr = code_gen_buffer;
816 /* XXX: flush processor icache at this point if cache flush is
817 expensive */
818 tb_flush_count++;
819 }
820
821 #ifdef DEBUG_TB_CHECK
822
823 static void tb_invalidate_check(target_ulong address)
824 {
825 TranslationBlock *tb;
826 int i;
827 address &= TARGET_PAGE_MASK;
828 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
829 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
830 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
831 address >= tb->pc + tb->size)) {
832 printf("ERROR invalidate: address=" TARGET_FMT_lx
833 " PC=%08lx size=%04x\n",
834 address, (long)tb->pc, tb->size);
835 }
836 }
837 }
838 }
839
840 /* verify that all the pages have correct rights for code */
841 static void tb_page_check(void)
842 {
843 TranslationBlock *tb;
844 int i, flags1, flags2;
845
846 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
847 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
848 flags1 = page_get_flags(tb->pc);
849 flags2 = page_get_flags(tb->pc + tb->size - 1);
850 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
851 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
852 (long)tb->pc, tb->size, flags1, flags2);
853 }
854 }
855 }
856 }
857
858 #endif
859
860 /* invalidate one TB */
861 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
862 int next_offset)
863 {
864 TranslationBlock *tb1;
865 for(;;) {
866 tb1 = *ptb;
867 if (tb1 == tb) {
868 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
869 break;
870 }
871 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
872 }
873 }
874
875 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
876 {
877 TranslationBlock *tb1;
878 unsigned int n1;
879
880 for(;;) {
881 tb1 = *ptb;
882 n1 = (uintptr_t)tb1 & 3;
883 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
884 if (tb1 == tb) {
885 *ptb = tb1->page_next[n1];
886 break;
887 }
888 ptb = &tb1->page_next[n1];
889 }
890 }
891
892 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
893 {
894 TranslationBlock *tb1, **ptb;
895 unsigned int n1;
896
897 ptb = &tb->jmp_next[n];
898 tb1 = *ptb;
899 if (tb1) {
900 /* find tb(n) in circular list */
901 for(;;) {
902 tb1 = *ptb;
903 n1 = (uintptr_t)tb1 & 3;
904 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
905 if (n1 == n && tb1 == tb)
906 break;
907 if (n1 == 2) {
908 ptb = &tb1->jmp_first;
909 } else {
910 ptb = &tb1->jmp_next[n1];
911 }
912 }
913 /* now we can suppress tb(n) from the list */
914 *ptb = tb->jmp_next[n];
915
916 tb->jmp_next[n] = NULL;
917 }
918 }
919
920 /* reset the jump entry 'n' of a TB so that it is not chained to
921 another TB */
922 static inline void tb_reset_jump(TranslationBlock *tb, int n)
923 {
924 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
925 }
926
927 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
928 {
929 CPUArchState *env;
930 PageDesc *p;
931 unsigned int h, n1;
932 tb_page_addr_t phys_pc;
933 TranslationBlock *tb1, *tb2;
934
935 /* remove the TB from the hash list */
936 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
937 h = tb_phys_hash_func(phys_pc);
938 tb_remove(&tb_phys_hash[h], tb,
939 offsetof(TranslationBlock, phys_hash_next));
940
941 /* remove the TB from the page list */
942 if (tb->page_addr[0] != page_addr) {
943 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
944 tb_page_remove(&p->first_tb, tb);
945 invalidate_page_bitmap(p);
946 }
947 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
948 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
951 }
952
953 tb_invalidated_flag = 1;
954
955 /* remove the TB from the hash list */
956 h = tb_jmp_cache_hash_func(tb->pc);
957 for(env = first_cpu; env != NULL; env = env->next_cpu) {
958 if (env->tb_jmp_cache[h] == tb)
959 env->tb_jmp_cache[h] = NULL;
960 }
961
962 /* suppress this TB from the two jump lists */
963 tb_jmp_remove(tb, 0);
964 tb_jmp_remove(tb, 1);
965
966 /* suppress any remaining jumps to this TB */
967 tb1 = tb->jmp_first;
968 for(;;) {
969 n1 = (uintptr_t)tb1 & 3;
970 if (n1 == 2)
971 break;
972 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
973 tb2 = tb1->jmp_next[n1];
974 tb_reset_jump(tb1, n1);
975 tb1->jmp_next[n1] = NULL;
976 tb1 = tb2;
977 }
978 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
979
980 tb_phys_invalidate_count++;
981 }
982
983 static inline void set_bits(uint8_t *tab, int start, int len)
984 {
985 int end, mask, end1;
986
987 end = start + len;
988 tab += start >> 3;
989 mask = 0xff << (start & 7);
990 if ((start & ~7) == (end & ~7)) {
991 if (start < end) {
992 mask &= ~(0xff << (end & 7));
993 *tab |= mask;
994 }
995 } else {
996 *tab++ |= mask;
997 start = (start + 8) & ~7;
998 end1 = end & ~7;
999 while (start < end1) {
1000 *tab++ = 0xff;
1001 start += 8;
1002 }
1003 if (start < end) {
1004 mask = ~(0xff << (end & 7));
1005 *tab |= mask;
1006 }
1007 }
1008 }
1009
1010 static void build_page_bitmap(PageDesc *p)
1011 {
1012 int n, tb_start, tb_end;
1013 TranslationBlock *tb;
1014
1015 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1016
1017 tb = p->first_tb;
1018 while (tb != NULL) {
1019 n = (uintptr_t)tb & 3;
1020 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1021 /* NOTE: this is subtle as a TB may span two physical pages */
1022 if (n == 0) {
1023 /* NOTE: tb_end may be after the end of the page, but
1024 it is not a problem */
1025 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1026 tb_end = tb_start + tb->size;
1027 if (tb_end > TARGET_PAGE_SIZE)
1028 tb_end = TARGET_PAGE_SIZE;
1029 } else {
1030 tb_start = 0;
1031 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1032 }
1033 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1034 tb = tb->page_next[n];
1035 }
1036 }
1037
1038 TranslationBlock *tb_gen_code(CPUArchState *env,
1039 target_ulong pc, target_ulong cs_base,
1040 int flags, int cflags)
1041 {
1042 TranslationBlock *tb;
1043 uint8_t *tc_ptr;
1044 tb_page_addr_t phys_pc, phys_page2;
1045 target_ulong virt_page2;
1046 int code_gen_size;
1047
1048 phys_pc = get_page_addr_code(env, pc);
1049 tb = tb_alloc(pc);
1050 if (!tb) {
1051 /* flush must be done */
1052 tb_flush(env);
1053 /* cannot fail at this point */
1054 tb = tb_alloc(pc);
1055 /* Don't forget to invalidate previous TB info. */
1056 tb_invalidated_flag = 1;
1057 }
1058 tc_ptr = code_gen_ptr;
1059 tb->tc_ptr = tc_ptr;
1060 tb->cs_base = cs_base;
1061 tb->flags = flags;
1062 tb->cflags = cflags;
1063 cpu_gen_code(env, tb, &code_gen_size);
1064 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1065 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1066
1067 /* check next page if needed */
1068 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1069 phys_page2 = -1;
1070 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1071 phys_page2 = get_page_addr_code(env, virt_page2);
1072 }
1073 tb_link_page(tb, phys_pc, phys_page2);
1074 return tb;
1075 }
1076
1077 /*
1078 * Invalidate all TBs which intersect with the target physical address range
1079 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1080 * 'is_cpu_write_access' should be true if called from a real cpu write
1081 * access: the virtual CPU will exit the current TB if code is modified inside
1082 * this TB.
1083 */
1084 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1085 int is_cpu_write_access)
1086 {
1087 while (start < end) {
1088 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1089 start &= TARGET_PAGE_MASK;
1090 start += TARGET_PAGE_SIZE;
1091 }
1092 }
1093
1094 /*
1095 * Invalidate all TBs which intersect with the target physical address range
1096 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1097 * 'is_cpu_write_access' should be true if called from a real cpu write
1098 * access: the virtual CPU will exit the current TB if code is modified inside
1099 * this TB.
1100 */
1101 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1102 int is_cpu_write_access)
1103 {
1104 TranslationBlock *tb, *tb_next, *saved_tb;
1105 CPUArchState *env = cpu_single_env;
1106 tb_page_addr_t tb_start, tb_end;
1107 PageDesc *p;
1108 int n;
1109 #ifdef TARGET_HAS_PRECISE_SMC
1110 int current_tb_not_found = is_cpu_write_access;
1111 TranslationBlock *current_tb = NULL;
1112 int current_tb_modified = 0;
1113 target_ulong current_pc = 0;
1114 target_ulong current_cs_base = 0;
1115 int current_flags = 0;
1116 #endif /* TARGET_HAS_PRECISE_SMC */
1117
1118 p = page_find(start >> TARGET_PAGE_BITS);
1119 if (!p)
1120 return;
1121 if (!p->code_bitmap &&
1122 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1123 is_cpu_write_access) {
1124 /* build code bitmap */
1125 build_page_bitmap(p);
1126 }
1127
1128 /* we remove all the TBs in the range [start, end[ */
1129 /* XXX: see if in some cases it could be faster to invalidate all the code */
1130 tb = p->first_tb;
1131 while (tb != NULL) {
1132 n = (uintptr_t)tb & 3;
1133 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1134 tb_next = tb->page_next[n];
1135 /* NOTE: this is subtle as a TB may span two physical pages */
1136 if (n == 0) {
1137 /* NOTE: tb_end may be after the end of the page, but
1138 it is not a problem */
1139 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1140 tb_end = tb_start + tb->size;
1141 } else {
1142 tb_start = tb->page_addr[1];
1143 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1144 }
1145 if (!(tb_end <= start || tb_start >= end)) {
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 if (current_tb_not_found) {
1148 current_tb_not_found = 0;
1149 current_tb = NULL;
1150 if (env->mem_io_pc) {
1151 /* now we have a real cpu fault */
1152 current_tb = tb_find_pc(env->mem_io_pc);
1153 }
1154 }
1155 if (current_tb == tb &&
1156 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1157 /* If we are modifying the current TB, we must stop
1158 its execution. We could be more precise by checking
1159 that the modification is after the current PC, but it
1160 would require a specialized function to partially
1161 restore the CPU state */
1162
1163 current_tb_modified = 1;
1164 cpu_restore_state(current_tb, env, env->mem_io_pc);
1165 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1166 &current_flags);
1167 }
1168 #endif /* TARGET_HAS_PRECISE_SMC */
1169 /* we need to do that to handle the case where a signal
1170 occurs while doing tb_phys_invalidate() */
1171 saved_tb = NULL;
1172 if (env) {
1173 saved_tb = env->current_tb;
1174 env->current_tb = NULL;
1175 }
1176 tb_phys_invalidate(tb, -1);
1177 if (env) {
1178 env->current_tb = saved_tb;
1179 if (env->interrupt_request && env->current_tb)
1180 cpu_interrupt(env, env->interrupt_request);
1181 }
1182 }
1183 tb = tb_next;
1184 }
1185 #if !defined(CONFIG_USER_ONLY)
1186 /* if no code remaining, no need to continue to use slow writes */
1187 if (!p->first_tb) {
1188 invalidate_page_bitmap(p);
1189 if (is_cpu_write_access) {
1190 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1191 }
1192 }
1193 #endif
1194 #ifdef TARGET_HAS_PRECISE_SMC
1195 if (current_tb_modified) {
1196 /* we generate a block containing just the instruction
1197 modifying the memory. It will ensure that it cannot modify
1198 itself */
1199 env->current_tb = NULL;
1200 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1201 cpu_resume_from_signal(env, NULL);
1202 }
1203 #endif
1204 }
1205
1206 /* len must be <= 8 and start must be a multiple of len */
1207 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1208 {
1209 PageDesc *p;
1210 int offset, b;
1211 #if 0
1212 if (1) {
1213 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1214 cpu_single_env->mem_io_vaddr, len,
1215 cpu_single_env->eip,
1216 cpu_single_env->eip +
1217 (intptr_t)cpu_single_env->segs[R_CS].base);
1218 }
1219 #endif
1220 p = page_find(start >> TARGET_PAGE_BITS);
1221 if (!p)
1222 return;
1223 if (p->code_bitmap) {
1224 offset = start & ~TARGET_PAGE_MASK;
1225 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1226 if (b & ((1 << len) - 1))
1227 goto do_invalidate;
1228 } else {
1229 do_invalidate:
1230 tb_invalidate_phys_page_range(start, start + len, 1);
1231 }
1232 }
1233
1234 #if !defined(CONFIG_SOFTMMU)
1235 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1236 uintptr_t pc, void *puc)
1237 {
1238 TranslationBlock *tb;
1239 PageDesc *p;
1240 int n;
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 TranslationBlock *current_tb = NULL;
1243 CPUArchState *env = cpu_single_env;
1244 int current_tb_modified = 0;
1245 target_ulong current_pc = 0;
1246 target_ulong current_cs_base = 0;
1247 int current_flags = 0;
1248 #endif
1249
1250 addr &= TARGET_PAGE_MASK;
1251 p = page_find(addr >> TARGET_PAGE_BITS);
1252 if (!p)
1253 return;
1254 tb = p->first_tb;
1255 #ifdef TARGET_HAS_PRECISE_SMC
1256 if (tb && pc != 0) {
1257 current_tb = tb_find_pc(pc);
1258 }
1259 #endif
1260 while (tb != NULL) {
1261 n = (uintptr_t)tb & 3;
1262 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb == tb &&
1265 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1266 /* If we are modifying the current TB, we must stop
1267 its execution. We could be more precise by checking
1268 that the modification is after the current PC, but it
1269 would require a specialized function to partially
1270 restore the CPU state */
1271
1272 current_tb_modified = 1;
1273 cpu_restore_state(current_tb, env, pc);
1274 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1275 &current_flags);
1276 }
1277 #endif /* TARGET_HAS_PRECISE_SMC */
1278 tb_phys_invalidate(tb, addr);
1279 tb = tb->page_next[n];
1280 }
1281 p->first_tb = NULL;
1282 #ifdef TARGET_HAS_PRECISE_SMC
1283 if (current_tb_modified) {
1284 /* we generate a block containing just the instruction
1285 modifying the memory. It will ensure that it cannot modify
1286 itself */
1287 env->current_tb = NULL;
1288 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1289 cpu_resume_from_signal(env, puc);
1290 }
1291 #endif
1292 }
1293 #endif
1294
1295 /* add the tb in the target page and protect it if necessary */
1296 static inline void tb_alloc_page(TranslationBlock *tb,
1297 unsigned int n, tb_page_addr_t page_addr)
1298 {
1299 PageDesc *p;
1300 #ifndef CONFIG_USER_ONLY
1301 bool page_already_protected;
1302 #endif
1303
1304 tb->page_addr[n] = page_addr;
1305 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1306 tb->page_next[n] = p->first_tb;
1307 #ifndef CONFIG_USER_ONLY
1308 page_already_protected = p->first_tb != NULL;
1309 #endif
1310 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1311 invalidate_page_bitmap(p);
1312
1313 #if defined(TARGET_HAS_SMC) || 1
1314
1315 #if defined(CONFIG_USER_ONLY)
1316 if (p->flags & PAGE_WRITE) {
1317 target_ulong addr;
1318 PageDesc *p2;
1319 int prot;
1320
1321 /* force the host page as non writable (writes will have a
1322 page fault + mprotect overhead) */
1323 page_addr &= qemu_host_page_mask;
1324 prot = 0;
1325 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1326 addr += TARGET_PAGE_SIZE) {
1327
1328 p2 = page_find (addr >> TARGET_PAGE_BITS);
1329 if (!p2)
1330 continue;
1331 prot |= p2->flags;
1332 p2->flags &= ~PAGE_WRITE;
1333 }
1334 mprotect(g2h(page_addr), qemu_host_page_size,
1335 (prot & PAGE_BITS) & ~PAGE_WRITE);
1336 #ifdef DEBUG_TB_INVALIDATE
1337 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1338 page_addr);
1339 #endif
1340 }
1341 #else
1342 /* if some code is already present, then the pages are already
1343 protected. So we handle the case where only the first TB is
1344 allocated in a physical page */
1345 if (!page_already_protected) {
1346 tlb_protect_code(page_addr);
1347 }
1348 #endif
1349
1350 #endif /* TARGET_HAS_SMC */
1351 }
1352
1353 /* add a new TB and link it to the physical page tables. phys_page2 is
1354 (-1) to indicate that only one page contains the TB. */
1355 void tb_link_page(TranslationBlock *tb,
1356 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1357 {
1358 unsigned int h;
1359 TranslationBlock **ptb;
1360
1361 /* Grab the mmap lock to stop another thread invalidating this TB
1362 before we are done. */
1363 mmap_lock();
1364 /* add in the physical hash table */
1365 h = tb_phys_hash_func(phys_pc);
1366 ptb = &tb_phys_hash[h];
1367 tb->phys_hash_next = *ptb;
1368 *ptb = tb;
1369
1370 /* add in the page list */
1371 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1372 if (phys_page2 != -1)
1373 tb_alloc_page(tb, 1, phys_page2);
1374 else
1375 tb->page_addr[1] = -1;
1376
1377 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1378 tb->jmp_next[0] = NULL;
1379 tb->jmp_next[1] = NULL;
1380
1381 /* init original jump addresses */
1382 if (tb->tb_next_offset[0] != 0xffff)
1383 tb_reset_jump(tb, 0);
1384 if (tb->tb_next_offset[1] != 0xffff)
1385 tb_reset_jump(tb, 1);
1386
1387 #ifdef DEBUG_TB_CHECK
1388 tb_page_check();
1389 #endif
1390 mmap_unlock();
1391 }
1392
1393 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1394 /* check whether the given addr is in TCG generated code buffer or not */
1395 bool is_tcg_gen_code(uintptr_t tc_ptr)
1396 {
1397 /* This can be called during code generation, code_gen_buffer_max_size
1398 is used instead of code_gen_ptr for upper boundary checking */
1399 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1400 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1401 }
1402 #endif
1403
1404 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1405 tb[1].tc_ptr. Return NULL if not found */
1406 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1407 {
1408 int m_min, m_max, m;
1409 uintptr_t v;
1410 TranslationBlock *tb;
1411
1412 if (nb_tbs <= 0)
1413 return NULL;
1414 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1415 tc_ptr >= (uintptr_t)code_gen_ptr) {
1416 return NULL;
1417 }
1418 /* binary search (cf Knuth) */
1419 m_min = 0;
1420 m_max = nb_tbs - 1;
1421 while (m_min <= m_max) {
1422 m = (m_min + m_max) >> 1;
1423 tb = &tbs[m];
1424 v = (uintptr_t)tb->tc_ptr;
1425 if (v == tc_ptr)
1426 return tb;
1427 else if (tc_ptr < v) {
1428 m_max = m - 1;
1429 } else {
1430 m_min = m + 1;
1431 }
1432 }
1433 return &tbs[m_max];
1434 }
1435
1436 static void tb_reset_jump_recursive(TranslationBlock *tb);
1437
1438 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1439 {
1440 TranslationBlock *tb1, *tb_next, **ptb;
1441 unsigned int n1;
1442
1443 tb1 = tb->jmp_next[n];
1444 if (tb1 != NULL) {
1445 /* find head of list */
1446 for(;;) {
1447 n1 = (uintptr_t)tb1 & 3;
1448 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1449 if (n1 == 2)
1450 break;
1451 tb1 = tb1->jmp_next[n1];
1452 }
1453 /* we are now sure now that tb jumps to tb1 */
1454 tb_next = tb1;
1455
1456 /* remove tb from the jmp_first list */
1457 ptb = &tb_next->jmp_first;
1458 for(;;) {
1459 tb1 = *ptb;
1460 n1 = (uintptr_t)tb1 & 3;
1461 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1462 if (n1 == n && tb1 == tb)
1463 break;
1464 ptb = &tb1->jmp_next[n1];
1465 }
1466 *ptb = tb->jmp_next[n];
1467 tb->jmp_next[n] = NULL;
1468
1469 /* suppress the jump to next tb in generated code */
1470 tb_reset_jump(tb, n);
1471
1472 /* suppress jumps in the tb on which we could have jumped */
1473 tb_reset_jump_recursive(tb_next);
1474 }
1475 }
1476
1477 static void tb_reset_jump_recursive(TranslationBlock *tb)
1478 {
1479 tb_reset_jump_recursive2(tb, 0);
1480 tb_reset_jump_recursive2(tb, 1);
1481 }
1482
1483 #if defined(TARGET_HAS_ICE)
1484 #if defined(CONFIG_USER_ONLY)
1485 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1486 {
1487 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1488 }
1489 #else
1490 void tb_invalidate_phys_addr(hwaddr addr)
1491 {
1492 ram_addr_t ram_addr;
1493 MemoryRegionSection *section;
1494
1495 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1496 if (!(memory_region_is_ram(section->mr)
1497 || (section->mr->rom_device && section->mr->readable))) {
1498 return;
1499 }
1500 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1501 + memory_region_section_addr(section, addr);
1502 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1503 }
1504
1505 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1506 {
1507 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1508 (pc & ~TARGET_PAGE_MASK));
1509 }
1510 #endif
1511 #endif /* TARGET_HAS_ICE */
1512
1513 #if defined(CONFIG_USER_ONLY)
1514 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1515
1516 {
1517 }
1518
1519 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1520 int flags, CPUWatchpoint **watchpoint)
1521 {
1522 return -ENOSYS;
1523 }
1524 #else
1525 /* Add a watchpoint. */
1526 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1527 int flags, CPUWatchpoint **watchpoint)
1528 {
1529 target_ulong len_mask = ~(len - 1);
1530 CPUWatchpoint *wp;
1531
1532 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1533 if ((len & (len - 1)) || (addr & ~len_mask) ||
1534 len == 0 || len > TARGET_PAGE_SIZE) {
1535 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1536 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1537 return -EINVAL;
1538 }
1539 wp = g_malloc(sizeof(*wp));
1540
1541 wp->vaddr = addr;
1542 wp->len_mask = len_mask;
1543 wp->flags = flags;
1544
1545 /* keep all GDB-injected watchpoints in front */
1546 if (flags & BP_GDB)
1547 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1548 else
1549 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1550
1551 tlb_flush_page(env, addr);
1552
1553 if (watchpoint)
1554 *watchpoint = wp;
1555 return 0;
1556 }
1557
1558 /* Remove a specific watchpoint. */
1559 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1560 int flags)
1561 {
1562 target_ulong len_mask = ~(len - 1);
1563 CPUWatchpoint *wp;
1564
1565 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1566 if (addr == wp->vaddr && len_mask == wp->len_mask
1567 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1568 cpu_watchpoint_remove_by_ref(env, wp);
1569 return 0;
1570 }
1571 }
1572 return -ENOENT;
1573 }
1574
1575 /* Remove a specific watchpoint by reference. */
1576 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1577 {
1578 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1579
1580 tlb_flush_page(env, watchpoint->vaddr);
1581
1582 g_free(watchpoint);
1583 }
1584
1585 /* Remove all matching watchpoints. */
1586 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1587 {
1588 CPUWatchpoint *wp, *next;
1589
1590 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1591 if (wp->flags & mask)
1592 cpu_watchpoint_remove_by_ref(env, wp);
1593 }
1594 }
1595 #endif
1596
1597 /* Add a breakpoint. */
1598 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1599 CPUBreakpoint **breakpoint)
1600 {
1601 #if defined(TARGET_HAS_ICE)
1602 CPUBreakpoint *bp;
1603
1604 bp = g_malloc(sizeof(*bp));
1605
1606 bp->pc = pc;
1607 bp->flags = flags;
1608
1609 /* keep all GDB-injected breakpoints in front */
1610 if (flags & BP_GDB)
1611 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1612 else
1613 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1614
1615 breakpoint_invalidate(env, pc);
1616
1617 if (breakpoint)
1618 *breakpoint = bp;
1619 return 0;
1620 #else
1621 return -ENOSYS;
1622 #endif
1623 }
1624
1625 /* Remove a specific breakpoint. */
1626 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1627 {
1628 #if defined(TARGET_HAS_ICE)
1629 CPUBreakpoint *bp;
1630
1631 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1632 if (bp->pc == pc && bp->flags == flags) {
1633 cpu_breakpoint_remove_by_ref(env, bp);
1634 return 0;
1635 }
1636 }
1637 return -ENOENT;
1638 #else
1639 return -ENOSYS;
1640 #endif
1641 }
1642
1643 /* Remove a specific breakpoint by reference. */
1644 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1645 {
1646 #if defined(TARGET_HAS_ICE)
1647 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1648
1649 breakpoint_invalidate(env, breakpoint->pc);
1650
1651 g_free(breakpoint);
1652 #endif
1653 }
1654
1655 /* Remove all matching breakpoints. */
1656 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1657 {
1658 #if defined(TARGET_HAS_ICE)
1659 CPUBreakpoint *bp, *next;
1660
1661 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1662 if (bp->flags & mask)
1663 cpu_breakpoint_remove_by_ref(env, bp);
1664 }
1665 #endif
1666 }
1667
1668 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1669 CPU loop after each instruction */
1670 void cpu_single_step(CPUArchState *env, int enabled)
1671 {
1672 #if defined(TARGET_HAS_ICE)
1673 if (env->singlestep_enabled != enabled) {
1674 env->singlestep_enabled = enabled;
1675 if (kvm_enabled())
1676 kvm_update_guest_debug(env, 0);
1677 else {
1678 /* must flush all the translated code to avoid inconsistencies */
1679 /* XXX: only flush what is necessary */
1680 tb_flush(env);
1681 }
1682 }
1683 #endif
1684 }
1685
1686 static void cpu_unlink_tb(CPUArchState *env)
1687 {
1688 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1689 problem and hope the cpu will stop of its own accord. For userspace
1690 emulation this often isn't actually as bad as it sounds. Often
1691 signals are used primarily to interrupt blocking syscalls. */
1692 TranslationBlock *tb;
1693 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1694
1695 spin_lock(&interrupt_lock);
1696 tb = env->current_tb;
1697 /* if the cpu is currently executing code, we must unlink it and
1698 all the potentially executing TB */
1699 if (tb) {
1700 env->current_tb = NULL;
1701 tb_reset_jump_recursive(tb);
1702 }
1703 spin_unlock(&interrupt_lock);
1704 }
1705
1706 #ifndef CONFIG_USER_ONLY
1707 /* mask must never be zero, except for A20 change call */
1708 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1709 {
1710 CPUState *cpu = ENV_GET_CPU(env);
1711 int old_mask;
1712
1713 old_mask = env->interrupt_request;
1714 env->interrupt_request |= mask;
1715
1716 /*
1717 * If called from iothread context, wake the target cpu in
1718 * case its halted.
1719 */
1720 if (!qemu_cpu_is_self(cpu)) {
1721 qemu_cpu_kick(cpu);
1722 return;
1723 }
1724
1725 if (use_icount) {
1726 env->icount_decr.u16.high = 0xffff;
1727 if (!can_do_io(env)
1728 && (mask & ~old_mask) != 0) {
1729 cpu_abort(env, "Raised interrupt while not in I/O function");
1730 }
1731 } else {
1732 cpu_unlink_tb(env);
1733 }
1734 }
1735
1736 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1737
1738 #else /* CONFIG_USER_ONLY */
1739
1740 void cpu_interrupt(CPUArchState *env, int mask)
1741 {
1742 env->interrupt_request |= mask;
1743 cpu_unlink_tb(env);
1744 }
1745 #endif /* CONFIG_USER_ONLY */
1746
1747 void cpu_reset_interrupt(CPUArchState *env, int mask)
1748 {
1749 env->interrupt_request &= ~mask;
1750 }
1751
1752 void cpu_exit(CPUArchState *env)
1753 {
1754 env->exit_request = 1;
1755 cpu_unlink_tb(env);
1756 }
1757
1758 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1759 {
1760 va_list ap;
1761 va_list ap2;
1762
1763 va_start(ap, fmt);
1764 va_copy(ap2, ap);
1765 fprintf(stderr, "qemu: fatal: ");
1766 vfprintf(stderr, fmt, ap);
1767 fprintf(stderr, "\n");
1768 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1769 if (qemu_log_enabled()) {
1770 qemu_log("qemu: fatal: ");
1771 qemu_log_vprintf(fmt, ap2);
1772 qemu_log("\n");
1773 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1774 qemu_log_flush();
1775 qemu_log_close();
1776 }
1777 va_end(ap2);
1778 va_end(ap);
1779 #if defined(CONFIG_USER_ONLY)
1780 {
1781 struct sigaction act;
1782 sigfillset(&act.sa_mask);
1783 act.sa_handler = SIG_DFL;
1784 sigaction(SIGABRT, &act, NULL);
1785 }
1786 #endif
1787 abort();
1788 }
1789
1790 CPUArchState *cpu_copy(CPUArchState *env)
1791 {
1792 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1793 CPUArchState *next_cpu = new_env->next_cpu;
1794 int cpu_index = new_env->cpu_index;
1795 #if defined(TARGET_HAS_ICE)
1796 CPUBreakpoint *bp;
1797 CPUWatchpoint *wp;
1798 #endif
1799
1800 memcpy(new_env, env, sizeof(CPUArchState));
1801
1802 /* Preserve chaining and index. */
1803 new_env->next_cpu = next_cpu;
1804 new_env->cpu_index = cpu_index;
1805
1806 /* Clone all break/watchpoints.
1807 Note: Once we support ptrace with hw-debug register access, make sure
1808 BP_CPU break/watchpoints are handled correctly on clone. */
1809 QTAILQ_INIT(&env->breakpoints);
1810 QTAILQ_INIT(&env->watchpoints);
1811 #if defined(TARGET_HAS_ICE)
1812 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1813 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1814 }
1815 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1816 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1817 wp->flags, NULL);
1818 }
1819 #endif
1820
1821 return new_env;
1822 }
1823
1824 #if !defined(CONFIG_USER_ONLY)
1825 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1826 {
1827 unsigned int i;
1828
1829 /* Discard jump cache entries for any tb which might potentially
1830 overlap the flushed page. */
1831 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1832 memset (&env->tb_jmp_cache[i], 0,
1833 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1834
1835 i = tb_jmp_cache_hash_page(addr);
1836 memset (&env->tb_jmp_cache[i], 0,
1837 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1838 }
1839
1840 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1841 uintptr_t length)
1842 {
1843 uintptr_t start1;
1844
1845 /* we modify the TLB cache so that the dirty bit will be set again
1846 when accessing the range */
1847 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1848 /* Check that we don't span multiple blocks - this breaks the
1849 address comparisons below. */
1850 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1851 != (end - 1) - start) {
1852 abort();
1853 }
1854 cpu_tlb_reset_dirty_all(start1, length);
1855
1856 }
1857
1858 /* Note: start and end must be within the same ram block. */
1859 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1860 int dirty_flags)
1861 {
1862 uintptr_t length;
1863
1864 start &= TARGET_PAGE_MASK;
1865 end = TARGET_PAGE_ALIGN(end);
1866
1867 length = end - start;
1868 if (length == 0)
1869 return;
1870 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1871
1872 if (tcg_enabled()) {
1873 tlb_reset_dirty_range_all(start, end, length);
1874 }
1875 }
1876
1877 int cpu_physical_memory_set_dirty_tracking(int enable)
1878 {
1879 int ret = 0;
1880 in_migration = enable;
1881 return ret;
1882 }
1883
1884 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1885 MemoryRegionSection *section,
1886 target_ulong vaddr,
1887 hwaddr paddr,
1888 int prot,
1889 target_ulong *address)
1890 {
1891 hwaddr iotlb;
1892 CPUWatchpoint *wp;
1893
1894 if (memory_region_is_ram(section->mr)) {
1895 /* Normal RAM. */
1896 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1897 + memory_region_section_addr(section, paddr);
1898 if (!section->readonly) {
1899 iotlb |= phys_section_notdirty;
1900 } else {
1901 iotlb |= phys_section_rom;
1902 }
1903 } else {
1904 /* IO handlers are currently passed a physical address.
1905 It would be nice to pass an offset from the base address
1906 of that region. This would avoid having to special case RAM,
1907 and avoid full address decoding in every device.
1908 We can't use the high bits of pd for this because
1909 IO_MEM_ROMD uses these as a ram address. */
1910 iotlb = section - phys_sections;
1911 iotlb += memory_region_section_addr(section, paddr);
1912 }
1913
1914 /* Make accesses to pages with watchpoints go via the
1915 watchpoint trap routines. */
1916 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1917 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1918 /* Avoid trapping reads of pages with a write breakpoint. */
1919 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1920 iotlb = phys_section_watch + paddr;
1921 *address |= TLB_MMIO;
1922 break;
1923 }
1924 }
1925 }
1926
1927 return iotlb;
1928 }
1929
1930 #else
1931 /*
1932 * Walks guest process memory "regions" one by one
1933 * and calls callback function 'fn' for each region.
1934 */
1935
1936 struct walk_memory_regions_data
1937 {
1938 walk_memory_regions_fn fn;
1939 void *priv;
1940 uintptr_t start;
1941 int prot;
1942 };
1943
1944 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1945 abi_ulong end, int new_prot)
1946 {
1947 if (data->start != -1ul) {
1948 int rc = data->fn(data->priv, data->start, end, data->prot);
1949 if (rc != 0) {
1950 return rc;
1951 }
1952 }
1953
1954 data->start = (new_prot ? end : -1ul);
1955 data->prot = new_prot;
1956
1957 return 0;
1958 }
1959
1960 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1961 abi_ulong base, int level, void **lp)
1962 {
1963 abi_ulong pa;
1964 int i, rc;
1965
1966 if (*lp == NULL) {
1967 return walk_memory_regions_end(data, base, 0);
1968 }
1969
1970 if (level == 0) {
1971 PageDesc *pd = *lp;
1972 for (i = 0; i < L2_SIZE; ++i) {
1973 int prot = pd[i].flags;
1974
1975 pa = base | (i << TARGET_PAGE_BITS);
1976 if (prot != data->prot) {
1977 rc = walk_memory_regions_end(data, pa, prot);
1978 if (rc != 0) {
1979 return rc;
1980 }
1981 }
1982 }
1983 } else {
1984 void **pp = *lp;
1985 for (i = 0; i < L2_SIZE; ++i) {
1986 pa = base | ((abi_ulong)i <<
1987 (TARGET_PAGE_BITS + L2_BITS * level));
1988 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1989 if (rc != 0) {
1990 return rc;
1991 }
1992 }
1993 }
1994
1995 return 0;
1996 }
1997
1998 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1999 {
2000 struct walk_memory_regions_data data;
2001 uintptr_t i;
2002
2003 data.fn = fn;
2004 data.priv = priv;
2005 data.start = -1ul;
2006 data.prot = 0;
2007
2008 for (i = 0; i < V_L1_SIZE; i++) {
2009 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2010 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2011 if (rc != 0) {
2012 return rc;
2013 }
2014 }
2015
2016 return walk_memory_regions_end(&data, 0, 0);
2017 }
2018
2019 static int dump_region(void *priv, abi_ulong start,
2020 abi_ulong end, unsigned long prot)
2021 {
2022 FILE *f = (FILE *)priv;
2023
2024 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2025 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2026 start, end, end - start,
2027 ((prot & PAGE_READ) ? 'r' : '-'),
2028 ((prot & PAGE_WRITE) ? 'w' : '-'),
2029 ((prot & PAGE_EXEC) ? 'x' : '-'));
2030
2031 return (0);
2032 }
2033
2034 /* dump memory mappings */
2035 void page_dump(FILE *f)
2036 {
2037 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2038 "start", "end", "size", "prot");
2039 walk_memory_regions(f, dump_region);
2040 }
2041
2042 int page_get_flags(target_ulong address)
2043 {
2044 PageDesc *p;
2045
2046 p = page_find(address >> TARGET_PAGE_BITS);
2047 if (!p)
2048 return 0;
2049 return p->flags;
2050 }
2051
2052 /* Modify the flags of a page and invalidate the code if necessary.
2053 The flag PAGE_WRITE_ORG is positioned automatically depending
2054 on PAGE_WRITE. The mmap_lock should already be held. */
2055 void page_set_flags(target_ulong start, target_ulong end, int flags)
2056 {
2057 target_ulong addr, len;
2058
2059 /* This function should never be called with addresses outside the
2060 guest address space. If this assert fires, it probably indicates
2061 a missing call to h2g_valid. */
2062 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2063 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2064 #endif
2065 assert(start < end);
2066
2067 start = start & TARGET_PAGE_MASK;
2068 end = TARGET_PAGE_ALIGN(end);
2069
2070 if (flags & PAGE_WRITE) {
2071 flags |= PAGE_WRITE_ORG;
2072 }
2073
2074 for (addr = start, len = end - start;
2075 len != 0;
2076 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2077 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2078
2079 /* If the write protection bit is set, then we invalidate
2080 the code inside. */
2081 if (!(p->flags & PAGE_WRITE) &&
2082 (flags & PAGE_WRITE) &&
2083 p->first_tb) {
2084 tb_invalidate_phys_page(addr, 0, NULL);
2085 }
2086 p->flags = flags;
2087 }
2088 }
2089
2090 int page_check_range(target_ulong start, target_ulong len, int flags)
2091 {
2092 PageDesc *p;
2093 target_ulong end;
2094 target_ulong addr;
2095
2096 /* This function should never be called with addresses outside the
2097 guest address space. If this assert fires, it probably indicates
2098 a missing call to h2g_valid. */
2099 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2100 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2101 #endif
2102
2103 if (len == 0) {
2104 return 0;
2105 }
2106 if (start + len - 1 < start) {
2107 /* We've wrapped around. */
2108 return -1;
2109 }
2110
2111 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2112 start = start & TARGET_PAGE_MASK;
2113
2114 for (addr = start, len = end - start;
2115 len != 0;
2116 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2117 p = page_find(addr >> TARGET_PAGE_BITS);
2118 if( !p )
2119 return -1;
2120 if( !(p->flags & PAGE_VALID) )
2121 return -1;
2122
2123 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2124 return -1;
2125 if (flags & PAGE_WRITE) {
2126 if (!(p->flags & PAGE_WRITE_ORG))
2127 return -1;
2128 /* unprotect the page if it was put read-only because it
2129 contains translated code */
2130 if (!(p->flags & PAGE_WRITE)) {
2131 if (!page_unprotect(addr, 0, NULL))
2132 return -1;
2133 }
2134 return 0;
2135 }
2136 }
2137 return 0;
2138 }
2139
2140 /* called from signal handler: invalidate the code and unprotect the
2141 page. Return TRUE if the fault was successfully handled. */
2142 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2143 {
2144 unsigned int prot;
2145 PageDesc *p;
2146 target_ulong host_start, host_end, addr;
2147
2148 /* Technically this isn't safe inside a signal handler. However we
2149 know this only ever happens in a synchronous SEGV handler, so in
2150 practice it seems to be ok. */
2151 mmap_lock();
2152
2153 p = page_find(address >> TARGET_PAGE_BITS);
2154 if (!p) {
2155 mmap_unlock();
2156 return 0;
2157 }
2158
2159 /* if the page was really writable, then we change its
2160 protection back to writable */
2161 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2162 host_start = address & qemu_host_page_mask;
2163 host_end = host_start + qemu_host_page_size;
2164
2165 prot = 0;
2166 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2167 p = page_find(addr >> TARGET_PAGE_BITS);
2168 p->flags |= PAGE_WRITE;
2169 prot |= p->flags;
2170
2171 /* and since the content will be modified, we must invalidate
2172 the corresponding translated code. */
2173 tb_invalidate_phys_page(addr, pc, puc);
2174 #ifdef DEBUG_TB_CHECK
2175 tb_invalidate_check(addr);
2176 #endif
2177 }
2178 mprotect((void *)g2h(host_start), qemu_host_page_size,
2179 prot & PAGE_BITS);
2180
2181 mmap_unlock();
2182 return 1;
2183 }
2184 mmap_unlock();
2185 return 0;
2186 }
2187 #endif /* defined(CONFIG_USER_ONLY) */
2188
2189 #if !defined(CONFIG_USER_ONLY)
2190
2191 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2192 typedef struct subpage_t {
2193 MemoryRegion iomem;
2194 hwaddr base;
2195 uint16_t sub_section[TARGET_PAGE_SIZE];
2196 } subpage_t;
2197
2198 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2199 uint16_t section);
2200 static subpage_t *subpage_init(hwaddr base);
2201 static void destroy_page_desc(uint16_t section_index)
2202 {
2203 MemoryRegionSection *section = &phys_sections[section_index];
2204 MemoryRegion *mr = section->mr;
2205
2206 if (mr->subpage) {
2207 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2208 memory_region_destroy(&subpage->iomem);
2209 g_free(subpage);
2210 }
2211 }
2212
2213 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2214 {
2215 unsigned i;
2216 PhysPageEntry *p;
2217
2218 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2219 return;
2220 }
2221
2222 p = phys_map_nodes[lp->ptr];
2223 for (i = 0; i < L2_SIZE; ++i) {
2224 if (!p[i].is_leaf) {
2225 destroy_l2_mapping(&p[i], level - 1);
2226 } else {
2227 destroy_page_desc(p[i].ptr);
2228 }
2229 }
2230 lp->is_leaf = 0;
2231 lp->ptr = PHYS_MAP_NODE_NIL;
2232 }
2233
2234 static void destroy_all_mappings(AddressSpaceDispatch *d)
2235 {
2236 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2237 phys_map_nodes_reset();
2238 }
2239
2240 static uint16_t phys_section_add(MemoryRegionSection *section)
2241 {
2242 if (phys_sections_nb == phys_sections_nb_alloc) {
2243 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2244 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2245 phys_sections_nb_alloc);
2246 }
2247 phys_sections[phys_sections_nb] = *section;
2248 return phys_sections_nb++;
2249 }
2250
2251 static void phys_sections_clear(void)
2252 {
2253 phys_sections_nb = 0;
2254 }
2255
2256 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2257 {
2258 subpage_t *subpage;
2259 hwaddr base = section->offset_within_address_space
2260 & TARGET_PAGE_MASK;
2261 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2262 MemoryRegionSection subsection = {
2263 .offset_within_address_space = base,
2264 .size = TARGET_PAGE_SIZE,
2265 };
2266 hwaddr start, end;
2267
2268 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2269
2270 if (!(existing->mr->subpage)) {
2271 subpage = subpage_init(base);
2272 subsection.mr = &subpage->iomem;
2273 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2274 phys_section_add(&subsection));
2275 } else {
2276 subpage = container_of(existing->mr, subpage_t, iomem);
2277 }
2278 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2279 end = start + section->size - 1;
2280 subpage_register(subpage, start, end, phys_section_add(section));
2281 }
2282
2283
2284 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2285 {
2286 hwaddr start_addr = section->offset_within_address_space;
2287 ram_addr_t size = section->size;
2288 hwaddr addr;
2289 uint16_t section_index = phys_section_add(section);
2290
2291 assert(size);
2292
2293 addr = start_addr;
2294 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2295 section_index);
2296 }
2297
2298 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2299 {
2300 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2301 MemoryRegionSection now = *section, remain = *section;
2302
2303 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2304 || (now.size < TARGET_PAGE_SIZE)) {
2305 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2306 - now.offset_within_address_space,
2307 now.size);
2308 register_subpage(d, &now);
2309 remain.size -= now.size;
2310 remain.offset_within_address_space += now.size;
2311 remain.offset_within_region += now.size;
2312 }
2313 while (remain.size >= TARGET_PAGE_SIZE) {
2314 now = remain;
2315 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2316 now.size = TARGET_PAGE_SIZE;
2317 register_subpage(d, &now);
2318 } else {
2319 now.size &= TARGET_PAGE_MASK;
2320 register_multipage(d, &now);
2321 }
2322 remain.size -= now.size;
2323 remain.offset_within_address_space += now.size;
2324 remain.offset_within_region += now.size;
2325 }
2326 now = remain;
2327 if (now.size) {
2328 register_subpage(d, &now);
2329 }
2330 }
2331
2332 void qemu_flush_coalesced_mmio_buffer(void)
2333 {
2334 if (kvm_enabled())
2335 kvm_flush_coalesced_mmio_buffer();
2336 }
2337
2338 #if defined(__linux__) && !defined(TARGET_S390X)
2339
2340 #include <sys/vfs.h>
2341
2342 #define HUGETLBFS_MAGIC 0x958458f6
2343
2344 static long gethugepagesize(const char *path)
2345 {
2346 struct statfs fs;
2347 int ret;
2348
2349 do {
2350 ret = statfs(path, &fs);
2351 } while (ret != 0 && errno == EINTR);
2352
2353 if (ret != 0) {
2354 perror(path);
2355 return 0;
2356 }
2357
2358 if (fs.f_type != HUGETLBFS_MAGIC)
2359 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2360
2361 return fs.f_bsize;
2362 }
2363
2364 static void *file_ram_alloc(RAMBlock *block,
2365 ram_addr_t memory,
2366 const char *path)
2367 {
2368 char *filename;
2369 void *area;
2370 int fd;
2371 #ifdef MAP_POPULATE
2372 int flags;
2373 #endif
2374 unsigned long hpagesize;
2375
2376 hpagesize = gethugepagesize(path);
2377 if (!hpagesize) {
2378 return NULL;
2379 }
2380
2381 if (memory < hpagesize) {
2382 return NULL;
2383 }
2384
2385 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2386 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2387 return NULL;
2388 }
2389
2390 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2391 return NULL;
2392 }
2393
2394 fd = mkstemp(filename);
2395 if (fd < 0) {
2396 perror("unable to create backing store for hugepages");
2397 free(filename);
2398 return NULL;
2399 }
2400 unlink(filename);
2401 free(filename);
2402
2403 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2404
2405 /*
2406 * ftruncate is not supported by hugetlbfs in older
2407 * hosts, so don't bother bailing out on errors.
2408 * If anything goes wrong with it under other filesystems,
2409 * mmap will fail.
2410 */
2411 if (ftruncate(fd, memory))
2412 perror("ftruncate");
2413
2414 #ifdef MAP_POPULATE
2415 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2416 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2417 * to sidestep this quirk.
2418 */
2419 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2420 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2421 #else
2422 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2423 #endif
2424 if (area == MAP_FAILED) {
2425 perror("file_ram_alloc: can't mmap RAM pages");
2426 close(fd);
2427 return (NULL);
2428 }
2429 block->fd = fd;
2430 return area;
2431 }
2432 #endif
2433
2434 static ram_addr_t find_ram_offset(ram_addr_t size)
2435 {
2436 RAMBlock *block, *next_block;
2437 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2438
2439 if (QLIST_EMPTY(&ram_list.blocks))
2440 return 0;
2441
2442 QLIST_FOREACH(block, &ram_list.blocks, next) {
2443 ram_addr_t end, next = RAM_ADDR_MAX;
2444
2445 end = block->offset + block->length;
2446
2447 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2448 if (next_block->offset >= end) {
2449 next = MIN(next, next_block->offset);
2450 }
2451 }
2452 if (next - end >= size && next - end < mingap) {
2453 offset = end;
2454 mingap = next - end;
2455 }
2456 }
2457
2458 if (offset == RAM_ADDR_MAX) {
2459 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2460 (uint64_t)size);
2461 abort();
2462 }
2463
2464 return offset;
2465 }
2466
2467 ram_addr_t last_ram_offset(void)
2468 {
2469 RAMBlock *block;
2470 ram_addr_t last = 0;
2471
2472 QLIST_FOREACH(block, &ram_list.blocks, next)
2473 last = MAX(last, block->offset + block->length);
2474
2475 return last;
2476 }
2477
2478 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2479 {
2480 int ret;
2481 QemuOpts *machine_opts;
2482
2483 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2484 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2485 if (machine_opts &&
2486 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2487 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2488 if (ret) {
2489 perror("qemu_madvise");
2490 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2491 "but dump_guest_core=off specified\n");
2492 }
2493 }
2494 }
2495
2496 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2497 {
2498 RAMBlock *new_block, *block;
2499
2500 new_block = NULL;
2501 QLIST_FOREACH(block, &ram_list.blocks, next) {
2502 if (block->offset == addr) {
2503 new_block = block;
2504 break;
2505 }
2506 }
2507 assert(new_block);
2508 assert(!new_block->idstr[0]);
2509
2510 if (dev) {
2511 char *id = qdev_get_dev_path(dev);
2512 if (id) {
2513 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2514 g_free(id);
2515 }
2516 }
2517 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2518
2519 QLIST_FOREACH(block, &ram_list.blocks, next) {
2520 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2521 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2522 new_block->idstr);
2523 abort();
2524 }
2525 }
2526 }
2527
2528 static int memory_try_enable_merging(void *addr, size_t len)
2529 {
2530 QemuOpts *opts;
2531
2532 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2533 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2534 /* disabled by the user */
2535 return 0;
2536 }
2537
2538 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2539 }
2540
2541 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2542 MemoryRegion *mr)
2543 {
2544 RAMBlock *new_block;
2545
2546 size = TARGET_PAGE_ALIGN(size);
2547 new_block = g_malloc0(sizeof(*new_block));
2548
2549 new_block->mr = mr;
2550 new_block->offset = find_ram_offset(size);
2551 if (host) {
2552 new_block->host = host;
2553 new_block->flags |= RAM_PREALLOC_MASK;
2554 } else {
2555 if (mem_path) {
2556 #if defined (__linux__) && !defined(TARGET_S390X)
2557 new_block->host = file_ram_alloc(new_block, size, mem_path);
2558 if (!new_block->host) {
2559 new_block->host = qemu_vmalloc(size);
2560 memory_try_enable_merging(new_block->host, size);
2561 }
2562 #else
2563 fprintf(stderr, "-mem-path option unsupported\n");
2564 exit(1);
2565 #endif
2566 } else {
2567 if (xen_enabled()) {
2568 xen_ram_alloc(new_block->offset, size, mr);
2569 } else if (kvm_enabled()) {
2570 /* some s390/kvm configurations have special constraints */
2571 new_block->host = kvm_vmalloc(size);
2572 } else {
2573 new_block->host = qemu_vmalloc(size);
2574 }
2575 memory_try_enable_merging(new_block->host, size);
2576 }
2577 }
2578 new_block->length = size;
2579
2580 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2581
2582 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2583 last_ram_offset() >> TARGET_PAGE_BITS);
2584 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2585 0, size >> TARGET_PAGE_BITS);
2586 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2587
2588 qemu_ram_setup_dump(new_block->host, size);
2589 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2590
2591 if (kvm_enabled())
2592 kvm_setup_guest_memory(new_block->host, size);
2593
2594 return new_block->offset;
2595 }
2596
2597 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2598 {
2599 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2600 }
2601
2602 void qemu_ram_free_from_ptr(ram_addr_t addr)
2603 {
2604 RAMBlock *block;
2605
2606 QLIST_FOREACH(block, &ram_list.blocks, next) {
2607 if (addr == block->offset) {
2608 QLIST_REMOVE(block, next);
2609 g_free(block);
2610 return;
2611 }
2612 }
2613 }
2614
2615 void qemu_ram_free(ram_addr_t addr)
2616 {
2617 RAMBlock *block;
2618
2619 QLIST_FOREACH(block, &ram_list.blocks, next) {
2620 if (addr == block->offset) {
2621 QLIST_REMOVE(block, next);
2622 if (block->flags & RAM_PREALLOC_MASK) {
2623 ;
2624 } else if (mem_path) {
2625 #if defined (__linux__) && !defined(TARGET_S390X)
2626 if (block->fd) {
2627 munmap(block->host, block->length);
2628 close(block->fd);
2629 } else {
2630 qemu_vfree(block->host);
2631 }
2632 #else
2633 abort();
2634 #endif
2635 } else {
2636 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2637 munmap(block->host, block->length);
2638 #else
2639 if (xen_enabled()) {
2640 xen_invalidate_map_cache_entry(block->host);
2641 } else {
2642 qemu_vfree(block->host);
2643 }
2644 #endif
2645 }
2646 g_free(block);
2647 return;
2648 }
2649 }
2650
2651 }
2652
2653 #ifndef _WIN32
2654 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2655 {
2656 RAMBlock *block;
2657 ram_addr_t offset;
2658 int flags;
2659 void *area, *vaddr;
2660
2661 QLIST_FOREACH(block, &ram_list.blocks, next) {
2662 offset = addr - block->offset;
2663 if (offset < block->length) {
2664 vaddr = block->host + offset;
2665 if (block->flags & RAM_PREALLOC_MASK) {
2666 ;
2667 } else {
2668 flags = MAP_FIXED;
2669 munmap(vaddr, length);
2670 if (mem_path) {
2671 #if defined(__linux__) && !defined(TARGET_S390X)
2672 if (block->fd) {
2673 #ifdef MAP_POPULATE
2674 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2675 MAP_PRIVATE;
2676 #else
2677 flags |= MAP_PRIVATE;
2678 #endif
2679 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2680 flags, block->fd, offset);
2681 } else {
2682 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2683 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2684 flags, -1, 0);
2685 }
2686 #else
2687 abort();
2688 #endif
2689 } else {
2690 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2691 flags |= MAP_SHARED | MAP_ANONYMOUS;
2692 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2693 flags, -1, 0);
2694 #else
2695 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2696 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2697 flags, -1, 0);
2698 #endif
2699 }
2700 if (area != vaddr) {
2701 fprintf(stderr, "Could not remap addr: "
2702 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2703 length, addr);
2704 exit(1);
2705 }
2706 memory_try_enable_merging(vaddr, length);
2707 qemu_ram_setup_dump(vaddr, length);
2708 }
2709 return;
2710 }
2711 }
2712 }
2713 #endif /* !_WIN32 */
2714
2715 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2716 With the exception of the softmmu code in this file, this should
2717 only be used for local memory (e.g. video ram) that the device owns,
2718 and knows it isn't going to access beyond the end of the block.
2719
2720 It should not be used for general purpose DMA.
2721 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2722 */
2723 void *qemu_get_ram_ptr(ram_addr_t addr)
2724 {
2725 RAMBlock *block;
2726
2727 QLIST_FOREACH(block, &ram_list.blocks, next) {
2728 if (addr - block->offset < block->length) {
2729 /* Move this entry to to start of the list. */
2730 if (block != QLIST_FIRST(&ram_list.blocks)) {
2731 QLIST_REMOVE(block, next);
2732 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2733 }
2734 if (xen_enabled()) {
2735 /* We need to check if the requested address is in the RAM
2736 * because we don't want to map the entire memory in QEMU.
2737 * In that case just map until the end of the page.
2738 */
2739 if (block->offset == 0) {
2740 return xen_map_cache(addr, 0, 0);
2741 } else if (block->host == NULL) {
2742 block->host =
2743 xen_map_cache(block->offset, block->length, 1);
2744 }
2745 }
2746 return block->host + (addr - block->offset);
2747 }
2748 }
2749
2750 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2751 abort();
2752
2753 return NULL;
2754 }
2755
2756 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2757 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2758 */
2759 void *qemu_safe_ram_ptr(ram_addr_t addr)
2760 {
2761 RAMBlock *block;
2762
2763 QLIST_FOREACH(block, &ram_list.blocks, next) {
2764 if (addr - block->offset < block->length) {
2765 if (xen_enabled()) {
2766 /* We need to check if the requested address is in the RAM
2767 * because we don't want to map the entire memory in QEMU.
2768 * In that case just map until the end of the page.
2769 */
2770 if (block->offset == 0) {
2771 return xen_map_cache(addr, 0, 0);
2772 } else if (block->host == NULL) {
2773 block->host =
2774 xen_map_cache(block->offset, block->length, 1);
2775 }
2776 }
2777 return block->host + (addr - block->offset);
2778 }
2779 }
2780
2781 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2782 abort();
2783
2784 return NULL;
2785 }
2786
2787 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2788 * but takes a size argument */
2789 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2790 {
2791 if (*size == 0) {
2792 return NULL;
2793 }
2794 if (xen_enabled()) {
2795 return xen_map_cache(addr, *size, 1);
2796 } else {
2797 RAMBlock *block;
2798
2799 QLIST_FOREACH(block, &ram_list.blocks, next) {
2800 if (addr - block->offset < block->length) {
2801 if (addr - block->offset + *size > block->length)
2802 *size = block->length - addr + block->offset;
2803 return block->host + (addr - block->offset);
2804 }
2805 }
2806
2807 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2808 abort();
2809 }
2810 }
2811
2812 void qemu_put_ram_ptr(void *addr)
2813 {
2814 trace_qemu_put_ram_ptr(addr);
2815 }
2816
2817 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2818 {
2819 RAMBlock *block;
2820 uint8_t *host = ptr;
2821
2822 if (xen_enabled()) {
2823 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2824 return 0;
2825 }
2826
2827 QLIST_FOREACH(block, &ram_list.blocks, next) {
2828 /* This case append when the block is not mapped. */
2829 if (block->host == NULL) {
2830 continue;
2831 }
2832 if (host - block->host < block->length) {
2833 *ram_addr = block->offset + (host - block->host);
2834 return 0;
2835 }
2836 }
2837
2838 return -1;
2839 }
2840
2841 /* Some of the softmmu routines need to translate from a host pointer
2842 (typically a TLB entry) back to a ram offset. */
2843 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2844 {
2845 ram_addr_t ram_addr;
2846
2847 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2848 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2849 abort();
2850 }
2851 return ram_addr;
2852 }
2853
2854 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2855 unsigned size)
2856 {
2857 #ifdef DEBUG_UNASSIGNED
2858 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2859 #endif
2860 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2861 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2862 #endif
2863 return 0;
2864 }
2865
2866 static void unassigned_mem_write(void *opaque, hwaddr addr,
2867 uint64_t val, unsigned size)
2868 {
2869 #ifdef DEBUG_UNASSIGNED
2870 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2871 #endif
2872 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2873 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2874 #endif
2875 }
2876
2877 static const MemoryRegionOps unassigned_mem_ops = {
2878 .read = unassigned_mem_read,
2879 .write = unassigned_mem_write,
2880 .endianness = DEVICE_NATIVE_ENDIAN,
2881 };
2882
2883 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2884 unsigned size)
2885 {
2886 abort();
2887 }
2888
2889 static void error_mem_write(void *opaque, hwaddr addr,
2890 uint64_t value, unsigned size)
2891 {
2892 abort();
2893 }
2894
2895 static const MemoryRegionOps error_mem_ops = {
2896 .read = error_mem_read,
2897 .write = error_mem_write,
2898 .endianness = DEVICE_NATIVE_ENDIAN,
2899 };
2900
2901 static const MemoryRegionOps rom_mem_ops = {
2902 .read = error_mem_read,
2903 .write = unassigned_mem_write,
2904 .endianness = DEVICE_NATIVE_ENDIAN,
2905 };
2906
2907 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2908 uint64_t val, unsigned size)
2909 {
2910 int dirty_flags;
2911 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2912 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2913 #if !defined(CONFIG_USER_ONLY)
2914 tb_invalidate_phys_page_fast(ram_addr, size);
2915 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2916 #endif
2917 }
2918 switch (size) {
2919 case 1:
2920 stb_p(qemu_get_ram_ptr(ram_addr), val);
2921 break;
2922 case 2:
2923 stw_p(qemu_get_ram_ptr(ram_addr), val);
2924 break;
2925 case 4:
2926 stl_p(qemu_get_ram_ptr(ram_addr), val);
2927 break;
2928 default:
2929 abort();
2930 }
2931 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2932 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2933 /* we remove the notdirty callback only if the code has been
2934 flushed */
2935 if (dirty_flags == 0xff)
2936 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2937 }
2938
2939 static const MemoryRegionOps notdirty_mem_ops = {
2940 .read = error_mem_read,
2941 .write = notdirty_mem_write,
2942 .endianness = DEVICE_NATIVE_ENDIAN,
2943 };
2944
2945 /* Generate a debug exception if a watchpoint has been hit. */
2946 static void check_watchpoint(int offset, int len_mask, int flags)
2947 {
2948 CPUArchState *env = cpu_single_env;
2949 target_ulong pc, cs_base;
2950 TranslationBlock *tb;
2951 target_ulong vaddr;
2952 CPUWatchpoint *wp;
2953 int cpu_flags;
2954
2955 if (env->watchpoint_hit) {
2956 /* We re-entered the check after replacing the TB. Now raise
2957 * the debug interrupt so that is will trigger after the
2958 * current instruction. */
2959 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2960 return;
2961 }
2962 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2963 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2964 if ((vaddr == (wp->vaddr & len_mask) ||
2965 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2966 wp->flags |= BP_WATCHPOINT_HIT;
2967 if (!env->watchpoint_hit) {
2968 env->watchpoint_hit = wp;
2969 tb = tb_find_pc(env->mem_io_pc);
2970 if (!tb) {
2971 cpu_abort(env, "check_watchpoint: could not find TB for "
2972 "pc=%p", (void *)env->mem_io_pc);
2973 }
2974 cpu_restore_state(tb, env, env->mem_io_pc);
2975 tb_phys_invalidate(tb, -1);
2976 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2977 env->exception_index = EXCP_DEBUG;
2978 cpu_loop_exit(env);
2979 } else {
2980 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2981 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2982 cpu_resume_from_signal(env, NULL);
2983 }
2984 }
2985 } else {
2986 wp->flags &= ~BP_WATCHPOINT_HIT;
2987 }
2988 }
2989 }
2990
2991 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2992 so these check for a hit then pass through to the normal out-of-line
2993 phys routines. */
2994 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2995 unsigned size)
2996 {
2997 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2998 switch (size) {
2999 case 1: return ldub_phys(addr);
3000 case 2: return lduw_phys(addr);
3001 case 4: return ldl_phys(addr);
3002 default: abort();
3003 }
3004 }
3005
3006 static void watch_mem_write(void *opaque, hwaddr addr,
3007 uint64_t val, unsigned size)
3008 {
3009 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3010 switch (size) {
3011 case 1:
3012 stb_phys(addr, val);
3013 break;
3014 case 2:
3015 stw_phys(addr, val);
3016 break;
3017 case 4:
3018 stl_phys(addr, val);
3019 break;
3020 default: abort();
3021 }
3022 }
3023
3024 static const MemoryRegionOps watch_mem_ops = {
3025 .read = watch_mem_read,
3026 .write = watch_mem_write,
3027 .endianness = DEVICE_NATIVE_ENDIAN,
3028 };
3029
3030 static uint64_t subpage_read(void *opaque, hwaddr addr,
3031 unsigned len)
3032 {
3033 subpage_t *mmio = opaque;
3034 unsigned int idx = SUBPAGE_IDX(addr);
3035 MemoryRegionSection *section;
3036 #if defined(DEBUG_SUBPAGE)
3037 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3038 mmio, len, addr, idx);
3039 #endif
3040
3041 section = &phys_sections[mmio->sub_section[idx]];
3042 addr += mmio->base;
3043 addr -= section->offset_within_address_space;
3044 addr += section->offset_within_region;
3045 return io_mem_read(section->mr, addr, len);
3046 }
3047
3048 static void subpage_write(void *opaque, hwaddr addr,
3049 uint64_t value, unsigned len)
3050 {
3051 subpage_t *mmio = opaque;
3052 unsigned int idx = SUBPAGE_IDX(addr);
3053 MemoryRegionSection *section;
3054 #if defined(DEBUG_SUBPAGE)
3055 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3056 " idx %d value %"PRIx64"\n",
3057 __func__, mmio, len, addr, idx, value);
3058 #endif
3059
3060 section = &phys_sections[mmio->sub_section[idx]];
3061 addr += mmio->base;
3062 addr -= section->offset_within_address_space;
3063 addr += section->offset_within_region;
3064 io_mem_write(section->mr, addr, value, len);
3065 }
3066
3067 static const MemoryRegionOps subpage_ops = {
3068 .read = subpage_read,
3069 .write = subpage_write,
3070 .endianness = DEVICE_NATIVE_ENDIAN,
3071 };
3072
3073 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3074 unsigned size)
3075 {
3076 ram_addr_t raddr = addr;
3077 void *ptr = qemu_get_ram_ptr(raddr);
3078 switch (size) {
3079 case 1: return ldub_p(ptr);
3080 case 2: return lduw_p(ptr);
3081 case 4: return ldl_p(ptr);
3082 default: abort();
3083 }
3084 }
3085
3086 static void subpage_ram_write(void *opaque, hwaddr addr,
3087 uint64_t value, unsigned size)
3088 {
3089 ram_addr_t raddr = addr;
3090 void *ptr = qemu_get_ram_ptr(raddr);
3091 switch (size) {
3092 case 1: return stb_p(ptr, value);
3093 case 2: return stw_p(ptr, value);
3094 case 4: return stl_p(ptr, value);
3095 default: abort();
3096 }
3097 }
3098
3099 static const MemoryRegionOps subpage_ram_ops = {
3100 .read = subpage_ram_read,
3101 .write = subpage_ram_write,
3102 .endianness = DEVICE_NATIVE_ENDIAN,
3103 };
3104
3105 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3106 uint16_t section)
3107 {
3108 int idx, eidx;
3109
3110 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3111 return -1;
3112 idx = SUBPAGE_IDX(start);
3113 eidx = SUBPAGE_IDX(end);
3114 #if defined(DEBUG_SUBPAGE)
3115 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3116 mmio, start, end, idx, eidx, memory);
3117 #endif
3118 if (memory_region_is_ram(phys_sections[section].mr)) {
3119 MemoryRegionSection new_section = phys_sections[section];
3120 new_section.mr = &io_mem_subpage_ram;
3121 section = phys_section_add(&new_section);
3122 }
3123 for (; idx <= eidx; idx++) {
3124 mmio->sub_section[idx] = section;
3125 }
3126
3127 return 0;
3128 }
3129
3130 static subpage_t *subpage_init(hwaddr base)
3131 {
3132 subpage_t *mmio;
3133
3134 mmio = g_malloc0(sizeof(subpage_t));
3135
3136 mmio->base = base;
3137 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3138 "subpage", TARGET_PAGE_SIZE);
3139 mmio->iomem.subpage = true;
3140 #if defined(DEBUG_SUBPAGE)
3141 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3142 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3143 #endif
3144 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3145
3146 return mmio;
3147 }
3148
3149 static uint16_t dummy_section(MemoryRegion *mr)
3150 {
3151 MemoryRegionSection section = {
3152 .mr = mr,
3153 .offset_within_address_space = 0,
3154 .offset_within_region = 0,
3155 .size = UINT64_MAX,
3156 };
3157
3158 return phys_section_add(&section);
3159 }
3160
3161 MemoryRegion *iotlb_to_region(hwaddr index)
3162 {
3163 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3164 }
3165
3166 static void io_mem_init(void)
3167 {
3168 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3169 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3170 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3171 "unassigned", UINT64_MAX);
3172 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3173 "notdirty", UINT64_MAX);
3174 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3175 "subpage-ram", UINT64_MAX);
3176 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3177 "watch", UINT64_MAX);
3178 }
3179
3180 static void mem_begin(MemoryListener *listener)
3181 {
3182 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3183
3184 destroy_all_mappings(d);
3185 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3186 }
3187
3188 static void core_begin(MemoryListener *listener)
3189 {
3190 phys_sections_clear();
3191 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3192 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3193 phys_section_rom = dummy_section(&io_mem_rom);
3194 phys_section_watch = dummy_section(&io_mem_watch);
3195 }
3196
3197 static void tcg_commit(MemoryListener *listener)
3198 {
3199 CPUArchState *env;
3200
3201 /* since each CPU stores ram addresses in its TLB cache, we must
3202 reset the modified entries */
3203 /* XXX: slow ! */
3204 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3205 tlb_flush(env, 1);
3206 }
3207 }
3208
3209 static void core_log_global_start(MemoryListener *listener)
3210 {
3211 cpu_physical_memory_set_dirty_tracking(1);
3212 }
3213
3214 static void core_log_global_stop(MemoryListener *listener)
3215 {
3216 cpu_physical_memory_set_dirty_tracking(0);
3217 }
3218
3219 static void io_region_add(MemoryListener *listener,
3220 MemoryRegionSection *section)
3221 {
3222 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3223
3224 mrio->mr = section->mr;
3225 mrio->offset = section->offset_within_region;
3226 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3227 section->offset_within_address_space, section->size);
3228 ioport_register(&mrio->iorange);
3229 }
3230
3231 static void io_region_del(MemoryListener *listener,
3232 MemoryRegionSection *section)
3233 {
3234 isa_unassign_ioport(section->offset_within_address_space, section->size);
3235 }
3236
3237 static MemoryListener core_memory_listener = {
3238 .begin = core_begin,
3239 .log_global_start = core_log_global_start,
3240 .log_global_stop = core_log_global_stop,
3241 .priority = 1,
3242 };
3243
3244 static MemoryListener io_memory_listener = {
3245 .region_add = io_region_add,
3246 .region_del = io_region_del,
3247 .priority = 0,
3248 };
3249
3250 static MemoryListener tcg_memory_listener = {
3251 .commit = tcg_commit,
3252 };
3253
3254 void address_space_init_dispatch(AddressSpace *as)
3255 {
3256 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3257
3258 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3259 d->listener = (MemoryListener) {
3260 .begin = mem_begin,
3261 .region_add = mem_add,
3262 .region_nop = mem_add,
3263 .priority = 0,
3264 };
3265 as->dispatch = d;
3266 memory_listener_register(&d->listener, as);
3267 }
3268
3269 void address_space_destroy_dispatch(AddressSpace *as)
3270 {
3271 AddressSpaceDispatch *d = as->dispatch;
3272
3273 memory_listener_unregister(&d->listener);
3274 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3275 g_free(d);
3276 as->dispatch = NULL;
3277 }
3278
3279 static void memory_map_init(void)
3280 {
3281 system_memory = g_malloc(sizeof(*system_memory));
3282 memory_region_init(system_memory, "system", INT64_MAX);
3283 address_space_init(&address_space_memory, system_memory);
3284 address_space_memory.name = "memory";
3285
3286 system_io = g_malloc(sizeof(*system_io));
3287 memory_region_init(system_io, "io", 65536);
3288 address_space_init(&address_space_io, system_io);
3289 address_space_io.name = "I/O";
3290
3291 memory_listener_register(&core_memory_listener, &address_space_memory);
3292 memory_listener_register(&io_memory_listener, &address_space_io);
3293 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3294 }
3295
3296 MemoryRegion *get_system_memory(void)
3297 {
3298 return system_memory;
3299 }
3300
3301 MemoryRegion *get_system_io(void)
3302 {
3303 return system_io;
3304 }
3305
3306 #endif /* !defined(CONFIG_USER_ONLY) */
3307
3308 /* physical memory access (slow version, mainly for debug) */
3309 #if defined(CONFIG_USER_ONLY)
3310 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3311 uint8_t *buf, int len, int is_write)
3312 {
3313 int l, flags;
3314 target_ulong page;
3315 void * p;
3316
3317 while (len > 0) {
3318 page = addr & TARGET_PAGE_MASK;
3319 l = (page + TARGET_PAGE_SIZE) - addr;
3320 if (l > len)
3321 l = len;
3322 flags = page_get_flags(page);
3323 if (!(flags & PAGE_VALID))
3324 return -1;
3325 if (is_write) {
3326 if (!(flags & PAGE_WRITE))
3327 return -1;
3328 /* XXX: this code should not depend on lock_user */
3329 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3330 return -1;
3331 memcpy(p, buf, l);
3332 unlock_user(p, addr, l);
3333 } else {
3334 if (!(flags & PAGE_READ))
3335 return -1;
3336 /* XXX: this code should not depend on lock_user */
3337 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3338 return -1;
3339 memcpy(buf, p, l);
3340 unlock_user(p, addr, 0);
3341 }
3342 len -= l;
3343 buf += l;
3344 addr += l;
3345 }
3346 return 0;
3347 }
3348
3349 #else
3350
3351 static void invalidate_and_set_dirty(hwaddr addr,
3352 hwaddr length)
3353 {
3354 if (!cpu_physical_memory_is_dirty(addr)) {
3355 /* invalidate code */
3356 tb_invalidate_phys_page_range(addr, addr + length, 0);
3357 /* set dirty bit */
3358 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3359 }
3360 xen_modified_memory(addr, length);
3361 }
3362
3363 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3364 int len, bool is_write)
3365 {
3366 AddressSpaceDispatch *d = as->dispatch;
3367 int l;
3368 uint8_t *ptr;
3369 uint32_t val;
3370 hwaddr page;
3371 MemoryRegionSection *section;
3372
3373 while (len > 0) {
3374 page = addr & TARGET_PAGE_MASK;
3375 l = (page + TARGET_PAGE_SIZE) - addr;
3376 if (l > len)
3377 l = len;
3378 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3379
3380 if (is_write) {
3381 if (!memory_region_is_ram(section->mr)) {
3382 hwaddr addr1;
3383 addr1 = memory_region_section_addr(section, addr);
3384 /* XXX: could force cpu_single_env to NULL to avoid
3385 potential bugs */
3386 if (l >= 4 && ((addr1 & 3) == 0)) {
3387 /* 32 bit write access */
3388 val = ldl_p(buf);
3389 io_mem_write(section->mr, addr1, val, 4);
3390 l = 4;
3391 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3392 /* 16 bit write access */
3393 val = lduw_p(buf);
3394 io_mem_write(section->mr, addr1, val, 2);
3395 l = 2;
3396 } else {
3397 /* 8 bit write access */
3398 val = ldub_p(buf);
3399 io_mem_write(section->mr, addr1, val, 1);
3400 l = 1;
3401 }
3402 } else if (!section->readonly) {
3403 ram_addr_t addr1;
3404 addr1 = memory_region_get_ram_addr(section->mr)
3405 + memory_region_section_addr(section, addr);
3406 /* RAM case */
3407 ptr = qemu_get_ram_ptr(addr1);
3408 memcpy(ptr, buf, l);
3409 invalidate_and_set_dirty(addr1, l);
3410 qemu_put_ram_ptr(ptr);
3411 }
3412 } else {
3413 if (!(memory_region_is_ram(section->mr) ||
3414 memory_region_is_romd(section->mr))) {
3415 hwaddr addr1;
3416 /* I/O case */
3417 addr1 = memory_region_section_addr(section, addr);
3418 if (l >= 4 && ((addr1 & 3) == 0)) {
3419 /* 32 bit read access */
3420 val = io_mem_read(section->mr, addr1, 4);
3421 stl_p(buf, val);
3422 l = 4;
3423 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3424 /* 16 bit read access */
3425 val = io_mem_read(section->mr, addr1, 2);
3426 stw_p(buf, val);
3427 l = 2;
3428 } else {
3429 /* 8 bit read access */
3430 val = io_mem_read(section->mr, addr1, 1);
3431 stb_p(buf, val);
3432 l = 1;
3433 }
3434 } else {
3435 /* RAM case */
3436 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3437 + memory_region_section_addr(section,
3438 addr));
3439 memcpy(buf, ptr, l);
3440 qemu_put_ram_ptr(ptr);
3441 }
3442 }
3443 len -= l;
3444 buf += l;
3445 addr += l;
3446 }
3447 }
3448
3449 void address_space_write(AddressSpace *as, hwaddr addr,
3450 const uint8_t *buf, int len)
3451 {
3452 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3453 }
3454
3455 /**
3456 * address_space_read: read from an address space.
3457 *
3458 * @as: #AddressSpace to be accessed
3459 * @addr: address within that address space
3460 * @buf: buffer with the data transferred
3461 */
3462 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3463 {
3464 address_space_rw(as, addr, buf, len, false);
3465 }
3466
3467
3468 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3469 int len, int is_write)
3470 {
3471 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3472 }
3473
3474 /* used for ROM loading : can write in RAM and ROM */
3475 void cpu_physical_memory_write_rom(hwaddr addr,
3476 const uint8_t *buf, int len)
3477 {
3478 AddressSpaceDispatch *d = address_space_memory.dispatch;
3479 int l;
3480 uint8_t *ptr;
3481 hwaddr page;
3482 MemoryRegionSection *section;
3483
3484 while (len > 0) {
3485 page = addr & TARGET_PAGE_MASK;
3486 l = (page + TARGET_PAGE_SIZE) - addr;
3487 if (l > len)
3488 l = len;
3489 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3490
3491 if (!(memory_region_is_ram(section->mr) ||
3492 memory_region_is_romd(section->mr))) {
3493 /* do nothing */
3494 } else {
3495 unsigned long addr1;
3496 addr1 = memory_region_get_ram_addr(section->mr)
3497 + memory_region_section_addr(section, addr);
3498 /* ROM/RAM case */
3499 ptr = qemu_get_ram_ptr(addr1);
3500 memcpy(ptr, buf, l);
3501 invalidate_and_set_dirty(addr1, l);
3502 qemu_put_ram_ptr(ptr);
3503 }
3504 len -= l;
3505 buf += l;
3506 addr += l;
3507 }
3508 }
3509
3510 typedef struct {
3511 void *buffer;
3512 hwaddr addr;
3513 hwaddr len;
3514 } BounceBuffer;
3515
3516 static BounceBuffer bounce;
3517
3518 typedef struct MapClient {
3519 void *opaque;
3520 void (*callback)(void *opaque);
3521 QLIST_ENTRY(MapClient) link;
3522 } MapClient;
3523
3524 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3525 = QLIST_HEAD_INITIALIZER(map_client_list);
3526
3527 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3528 {
3529 MapClient *client = g_malloc(sizeof(*client));
3530
3531 client->opaque = opaque;
3532 client->callback = callback;
3533 QLIST_INSERT_HEAD(&map_client_list, client, link);
3534 return client;
3535 }
3536
3537 void cpu_unregister_map_client(void *_client)
3538 {
3539 MapClient *client = (MapClient *)_client;
3540
3541 QLIST_REMOVE(client, link);
3542 g_free(client);
3543 }
3544
3545 static void cpu_notify_map_clients(void)
3546 {
3547 MapClient *client;
3548
3549 while (!QLIST_EMPTY(&map_client_list)) {
3550 client = QLIST_FIRST(&map_client_list);
3551 client->callback(client->opaque);
3552 cpu_unregister_map_client(client);
3553 }
3554 }
3555
3556 /* Map a physical memory region into a host virtual address.
3557 * May map a subset of the requested range, given by and returned in *plen.
3558 * May return NULL if resources needed to perform the mapping are exhausted.
3559 * Use only for reads OR writes - not for read-modify-write operations.
3560 * Use cpu_register_map_client() to know when retrying the map operation is
3561 * likely to succeed.
3562 */
3563 void *address_space_map(AddressSpace *as,
3564 hwaddr addr,
3565 hwaddr *plen,
3566 bool is_write)
3567 {
3568 AddressSpaceDispatch *d = as->dispatch;
3569 hwaddr len = *plen;
3570 hwaddr todo = 0;
3571 int l;
3572 hwaddr page;
3573 MemoryRegionSection *section;
3574 ram_addr_t raddr = RAM_ADDR_MAX;
3575 ram_addr_t rlen;
3576 void *ret;
3577
3578 while (len > 0) {
3579 page = addr & TARGET_PAGE_MASK;
3580 l = (page + TARGET_PAGE_SIZE) - addr;
3581 if (l > len)
3582 l = len;
3583 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3584
3585 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3586 if (todo || bounce.buffer) {
3587 break;
3588 }
3589 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3590 bounce.addr = addr;
3591 bounce.len = l;
3592 if (!is_write) {
3593 address_space_read(as, addr, bounce.buffer, l);
3594 }
3595
3596 *plen = l;
3597 return bounce.buffer;
3598 }
3599 if (!todo) {
3600 raddr = memory_region_get_ram_addr(section->mr)
3601 + memory_region_section_addr(section, addr);
3602 }
3603
3604 len -= l;
3605 addr += l;
3606 todo += l;
3607 }
3608 rlen = todo;
3609 ret = qemu_ram_ptr_length(raddr, &rlen);
3610 *plen = rlen;
3611 return ret;
3612 }
3613
3614 /* Unmaps a memory region previously mapped by address_space_map().
3615 * Will also mark the memory as dirty if is_write == 1. access_len gives
3616 * the amount of memory that was actually read or written by the caller.
3617 */
3618 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3619 int is_write, hwaddr access_len)
3620 {
3621 if (buffer != bounce.buffer) {
3622 if (is_write) {
3623 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3624 while (access_len) {
3625 unsigned l;
3626 l = TARGET_PAGE_SIZE;
3627 if (l > access_len)
3628 l = access_len;
3629 invalidate_and_set_dirty(addr1, l);
3630 addr1 += l;
3631 access_len -= l;
3632 }
3633 }
3634 if (xen_enabled()) {
3635 xen_invalidate_map_cache_entry(buffer);
3636 }
3637 return;
3638 }
3639 if (is_write) {
3640 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3641 }
3642 qemu_vfree(bounce.buffer);
3643 bounce.buffer = NULL;
3644 cpu_notify_map_clients();
3645 }
3646
3647 void *cpu_physical_memory_map(hwaddr addr,
3648 hwaddr *plen,
3649 int is_write)
3650 {
3651 return address_space_map(&address_space_memory, addr, plen, is_write);
3652 }
3653
3654 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3655 int is_write, hwaddr access_len)
3656 {
3657 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3658 }
3659
3660 /* warning: addr must be aligned */
3661 static inline uint32_t ldl_phys_internal(hwaddr addr,
3662 enum device_endian endian)
3663 {
3664 uint8_t *ptr;
3665 uint32_t val;
3666 MemoryRegionSection *section;
3667
3668 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3669
3670 if (!(memory_region_is_ram(section->mr) ||
3671 memory_region_is_romd(section->mr))) {
3672 /* I/O case */
3673 addr = memory_region_section_addr(section, addr);
3674 val = io_mem_read(section->mr, addr, 4);
3675 #if defined(TARGET_WORDS_BIGENDIAN)
3676 if (endian == DEVICE_LITTLE_ENDIAN) {
3677 val = bswap32(val);
3678 }
3679 #else
3680 if (endian == DEVICE_BIG_ENDIAN) {
3681 val = bswap32(val);
3682 }
3683 #endif
3684 } else {
3685 /* RAM case */
3686 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3687 & TARGET_PAGE_MASK)
3688 + memory_region_section_addr(section, addr));
3689 switch (endian) {
3690 case DEVICE_LITTLE_ENDIAN:
3691 val = ldl_le_p(ptr);
3692 break;
3693 case DEVICE_BIG_ENDIAN:
3694 val = ldl_be_p(ptr);
3695 break;
3696 default:
3697 val = ldl_p(ptr);
3698 break;
3699 }
3700 }
3701 return val;
3702 }
3703
3704 uint32_t ldl_phys(hwaddr addr)
3705 {
3706 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3707 }
3708
3709 uint32_t ldl_le_phys(hwaddr addr)
3710 {
3711 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3712 }
3713
3714 uint32_t ldl_be_phys(hwaddr addr)
3715 {
3716 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3717 }
3718
3719 /* warning: addr must be aligned */
3720 static inline uint64_t ldq_phys_internal(hwaddr addr,
3721 enum device_endian endian)
3722 {
3723 uint8_t *ptr;
3724 uint64_t val;
3725 MemoryRegionSection *section;
3726
3727 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3728
3729 if (!(memory_region_is_ram(section->mr) ||
3730 memory_region_is_romd(section->mr))) {
3731 /* I/O case */
3732 addr = memory_region_section_addr(section, addr);
3733
3734 /* XXX This is broken when device endian != cpu endian.
3735 Fix and add "endian" variable check */
3736 #ifdef TARGET_WORDS_BIGENDIAN
3737 val = io_mem_read(section->mr, addr, 4) << 32;
3738 val |= io_mem_read(section->mr, addr + 4, 4);
3739 #else
3740 val = io_mem_read(section->mr, addr, 4);
3741 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3742 #endif
3743 } else {
3744 /* RAM case */
3745 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3746 & TARGET_PAGE_MASK)
3747 + memory_region_section_addr(section, addr));
3748 switch (endian) {
3749 case DEVICE_LITTLE_ENDIAN:
3750 val = ldq_le_p(ptr);
3751 break;
3752 case DEVICE_BIG_ENDIAN:
3753 val = ldq_be_p(ptr);
3754 break;
3755 default:
3756 val = ldq_p(ptr);
3757 break;
3758 }
3759 }
3760 return val;
3761 }
3762
3763 uint64_t ldq_phys(hwaddr addr)
3764 {
3765 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3766 }
3767
3768 uint64_t ldq_le_phys(hwaddr addr)
3769 {
3770 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3771 }
3772
3773 uint64_t ldq_be_phys(hwaddr addr)
3774 {
3775 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3776 }
3777
3778 /* XXX: optimize */
3779 uint32_t ldub_phys(hwaddr addr)
3780 {
3781 uint8_t val;
3782 cpu_physical_memory_read(addr, &val, 1);
3783 return val;
3784 }
3785
3786 /* warning: addr must be aligned */
3787 static inline uint32_t lduw_phys_internal(hwaddr addr,
3788 enum device_endian endian)
3789 {
3790 uint8_t *ptr;
3791 uint64_t val;
3792 MemoryRegionSection *section;
3793
3794 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3795
3796 if (!(memory_region_is_ram(section->mr) ||
3797 memory_region_is_romd(section->mr))) {
3798 /* I/O case */
3799 addr = memory_region_section_addr(section, addr);
3800 val = io_mem_read(section->mr, addr, 2);
3801 #if defined(TARGET_WORDS_BIGENDIAN)
3802 if (endian == DEVICE_LITTLE_ENDIAN) {
3803 val = bswap16(val);
3804 }
3805 #else
3806 if (endian == DEVICE_BIG_ENDIAN) {
3807 val = bswap16(val);
3808 }
3809 #endif
3810 } else {
3811 /* RAM case */
3812 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3813 & TARGET_PAGE_MASK)
3814 + memory_region_section_addr(section, addr));
3815 switch (endian) {
3816 case DEVICE_LITTLE_ENDIAN:
3817 val = lduw_le_p(ptr);
3818 break;
3819 case DEVICE_BIG_ENDIAN:
3820 val = lduw_be_p(ptr);
3821 break;
3822 default:
3823 val = lduw_p(ptr);
3824 break;
3825 }
3826 }
3827 return val;
3828 }
3829
3830 uint32_t lduw_phys(hwaddr addr)
3831 {
3832 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3833 }
3834
3835 uint32_t lduw_le_phys(hwaddr addr)
3836 {
3837 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3838 }
3839
3840 uint32_t lduw_be_phys(hwaddr addr)
3841 {
3842 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3843 }
3844
3845 /* warning: addr must be aligned. The ram page is not masked as dirty
3846 and the code inside is not invalidated. It is useful if the dirty
3847 bits are used to track modified PTEs */
3848 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3849 {
3850 uint8_t *ptr;
3851 MemoryRegionSection *section;
3852
3853 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3854
3855 if (!memory_region_is_ram(section->mr) || section->readonly) {
3856 addr = memory_region_section_addr(section, addr);
3857 if (memory_region_is_ram(section->mr)) {
3858 section = &phys_sections[phys_section_rom];
3859 }
3860 io_mem_write(section->mr, addr, val, 4);
3861 } else {
3862 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3863 & TARGET_PAGE_MASK)
3864 + memory_region_section_addr(section, addr);
3865 ptr = qemu_get_ram_ptr(addr1);
3866 stl_p(ptr, val);
3867
3868 if (unlikely(in_migration)) {
3869 if (!cpu_physical_memory_is_dirty(addr1)) {
3870 /* invalidate code */
3871 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3872 /* set dirty bit */
3873 cpu_physical_memory_set_dirty_flags(
3874 addr1, (0xff & ~CODE_DIRTY_FLAG));
3875 }
3876 }
3877 }
3878 }
3879
3880 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3881 {
3882 uint8_t *ptr;
3883 MemoryRegionSection *section;
3884
3885 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3886
3887 if (!memory_region_is_ram(section->mr) || section->readonly) {
3888 addr = memory_region_section_addr(section, addr);
3889 if (memory_region_is_ram(section->mr)) {
3890 section = &phys_sections[phys_section_rom];
3891 }
3892 #ifdef TARGET_WORDS_BIGENDIAN
3893 io_mem_write(section->mr, addr, val >> 32, 4);
3894 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3895 #else
3896 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3897 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3898 #endif
3899 } else {
3900 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3901 & TARGET_PAGE_MASK)
3902 + memory_region_section_addr(section, addr));
3903 stq_p(ptr, val);
3904 }
3905 }
3906
3907 /* warning: addr must be aligned */
3908 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3909 enum device_endian endian)
3910 {
3911 uint8_t *ptr;
3912 MemoryRegionSection *section;
3913
3914 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3915
3916 if (!memory_region_is_ram(section->mr) || section->readonly) {
3917 addr = memory_region_section_addr(section, addr);
3918 if (memory_region_is_ram(section->mr)) {
3919 section = &phys_sections[phys_section_rom];
3920 }
3921 #if defined(TARGET_WORDS_BIGENDIAN)
3922 if (endian == DEVICE_LITTLE_ENDIAN) {
3923 val = bswap32(val);
3924 }
3925 #else
3926 if (endian == DEVICE_BIG_ENDIAN) {
3927 val = bswap32(val);
3928 }
3929 #endif
3930 io_mem_write(section->mr, addr, val, 4);
3931 } else {
3932 unsigned long addr1;
3933 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3934 + memory_region_section_addr(section, addr);
3935 /* RAM case */
3936 ptr = qemu_get_ram_ptr(addr1);
3937 switch (endian) {
3938 case DEVICE_LITTLE_ENDIAN:
3939 stl_le_p(ptr, val);
3940 break;
3941 case DEVICE_BIG_ENDIAN:
3942 stl_be_p(ptr, val);
3943 break;
3944 default:
3945 stl_p(ptr, val);
3946 break;
3947 }
3948 invalidate_and_set_dirty(addr1, 4);
3949 }
3950 }
3951
3952 void stl_phys(hwaddr addr, uint32_t val)
3953 {
3954 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3955 }
3956
3957 void stl_le_phys(hwaddr addr, uint32_t val)
3958 {
3959 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3960 }
3961
3962 void stl_be_phys(hwaddr addr, uint32_t val)
3963 {
3964 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3965 }
3966
3967 /* XXX: optimize */
3968 void stb_phys(hwaddr addr, uint32_t val)
3969 {
3970 uint8_t v = val;
3971 cpu_physical_memory_write(addr, &v, 1);
3972 }
3973
3974 /* warning: addr must be aligned */
3975 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3976 enum device_endian endian)
3977 {
3978 uint8_t *ptr;
3979 MemoryRegionSection *section;
3980
3981 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3982
3983 if (!memory_region_is_ram(section->mr) || section->readonly) {
3984 addr = memory_region_section_addr(section, addr);
3985 if (memory_region_is_ram(section->mr)) {
3986 section = &phys_sections[phys_section_rom];
3987 }
3988 #if defined(TARGET_WORDS_BIGENDIAN)
3989 if (endian == DEVICE_LITTLE_ENDIAN) {
3990 val = bswap16(val);
3991 }
3992 #else
3993 if (endian == DEVICE_BIG_ENDIAN) {
3994 val = bswap16(val);
3995 }
3996 #endif
3997 io_mem_write(section->mr, addr, val, 2);
3998 } else {
3999 unsigned long addr1;
4000 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4001 + memory_region_section_addr(section, addr);
4002 /* RAM case */
4003 ptr = qemu_get_ram_ptr(addr1);
4004 switch (endian) {
4005 case DEVICE_LITTLE_ENDIAN:
4006 stw_le_p(ptr, val);
4007 break;
4008 case DEVICE_BIG_ENDIAN:
4009 stw_be_p(ptr, val);
4010 break;
4011 default:
4012 stw_p(ptr, val);
4013 break;
4014 }
4015 invalidate_and_set_dirty(addr1, 2);
4016 }
4017 }
4018
4019 void stw_phys(hwaddr addr, uint32_t val)
4020 {
4021 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4022 }
4023
4024 void stw_le_phys(hwaddr addr, uint32_t val)
4025 {
4026 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4027 }
4028
4029 void stw_be_phys(hwaddr addr, uint32_t val)
4030 {
4031 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4032 }
4033
4034 /* XXX: optimize */
4035 void stq_phys(hwaddr addr, uint64_t val)
4036 {
4037 val = tswap64(val);
4038 cpu_physical_memory_write(addr, &val, 8);
4039 }
4040
4041 void stq_le_phys(hwaddr addr, uint64_t val)
4042 {
4043 val = cpu_to_le64(val);
4044 cpu_physical_memory_write(addr, &val, 8);
4045 }
4046
4047 void stq_be_phys(hwaddr addr, uint64_t val)
4048 {
4049 val = cpu_to_be64(val);
4050 cpu_physical_memory_write(addr, &val, 8);
4051 }
4052
4053 /* virtual memory access for debug (includes writing to ROM) */
4054 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4055 uint8_t *buf, int len, int is_write)
4056 {
4057 int l;
4058 hwaddr phys_addr;
4059 target_ulong page;
4060
4061 while (len > 0) {
4062 page = addr & TARGET_PAGE_MASK;
4063 phys_addr = cpu_get_phys_page_debug(env, page);
4064 /* if no physical page mapped, return an error */
4065 if (phys_addr == -1)
4066 return -1;
4067 l = (page + TARGET_PAGE_SIZE) - addr;
4068 if (l > len)
4069 l = len;
4070 phys_addr += (addr & ~TARGET_PAGE_MASK);
4071 if (is_write)
4072 cpu_physical_memory_write_rom(phys_addr, buf, l);
4073 else
4074 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4075 len -= l;
4076 buf += l;
4077 addr += l;
4078 }
4079 return 0;
4080 }
4081 #endif
4082
4083 /* in deterministic execution mode, instructions doing device I/Os
4084 must be at the end of the TB */
4085 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4086 {
4087 TranslationBlock *tb;
4088 uint32_t n, cflags;
4089 target_ulong pc, cs_base;
4090 uint64_t flags;
4091
4092 tb = tb_find_pc(retaddr);
4093 if (!tb) {
4094 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4095 (void *)retaddr);
4096 }
4097 n = env->icount_decr.u16.low + tb->icount;
4098 cpu_restore_state(tb, env, retaddr);
4099 /* Calculate how many instructions had been executed before the fault
4100 occurred. */
4101 n = n - env->icount_decr.u16.low;
4102 /* Generate a new TB ending on the I/O insn. */
4103 n++;
4104 /* On MIPS and SH, delay slot instructions can only be restarted if
4105 they were already the first instruction in the TB. If this is not
4106 the first instruction in a TB then re-execute the preceding
4107 branch. */
4108 #if defined(TARGET_MIPS)
4109 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4110 env->active_tc.PC -= 4;
4111 env->icount_decr.u16.low++;
4112 env->hflags &= ~MIPS_HFLAG_BMASK;
4113 }
4114 #elif defined(TARGET_SH4)
4115 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4116 && n > 1) {
4117 env->pc -= 2;
4118 env->icount_decr.u16.low++;
4119 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4120 }
4121 #endif
4122 /* This should never happen. */
4123 if (n > CF_COUNT_MASK)
4124 cpu_abort(env, "TB too big during recompile");
4125
4126 cflags = n | CF_LAST_IO;
4127 pc = tb->pc;
4128 cs_base = tb->cs_base;
4129 flags = tb->flags;
4130 tb_phys_invalidate(tb, -1);
4131 /* FIXME: In theory this could raise an exception. In practice
4132 we have already translated the block once so it's probably ok. */
4133 tb_gen_code(env, pc, cs_base, flags, cflags);
4134 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4135 the first in the TB) then we end up generating a whole new TB and
4136 repeating the fault, which is horribly inefficient.
4137 Better would be to execute just this insn uncached, or generate a
4138 second new TB. */
4139 cpu_resume_from_signal(env, NULL);
4140 }
4141
4142 #if !defined(CONFIG_USER_ONLY)
4143
4144 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4145 {
4146 int i, target_code_size, max_target_code_size;
4147 int direct_jmp_count, direct_jmp2_count, cross_page;
4148 TranslationBlock *tb;
4149
4150 target_code_size = 0;
4151 max_target_code_size = 0;
4152 cross_page = 0;
4153 direct_jmp_count = 0;
4154 direct_jmp2_count = 0;
4155 for(i = 0; i < nb_tbs; i++) {
4156 tb = &tbs[i];
4157 target_code_size += tb->size;
4158 if (tb->size > max_target_code_size)
4159 max_target_code_size = tb->size;
4160 if (tb->page_addr[1] != -1)
4161 cross_page++;
4162 if (tb->tb_next_offset[0] != 0xffff) {
4163 direct_jmp_count++;
4164 if (tb->tb_next_offset[1] != 0xffff) {
4165 direct_jmp2_count++;
4166 }
4167 }
4168 }
4169 /* XXX: avoid using doubles ? */
4170 cpu_fprintf(f, "Translation buffer state:\n");
4171 cpu_fprintf(f, "gen code size %td/%zd\n",
4172 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4173 cpu_fprintf(f, "TB count %d/%d\n",
4174 nb_tbs, code_gen_max_blocks);
4175 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4176 nb_tbs ? target_code_size / nb_tbs : 0,
4177 max_target_code_size);
4178 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4179 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4180 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4181 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4182 cross_page,
4183 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4184 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4185 direct_jmp_count,
4186 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4187 direct_jmp2_count,
4188 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4189 cpu_fprintf(f, "\nStatistics:\n");
4190 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4191 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4192 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4193 tcg_dump_info(f, cpu_fprintf);
4194 }
4195
4196 /*
4197 * A helper function for the _utterly broken_ virtio device model to find out if
4198 * it's running on a big endian machine. Don't do this at home kids!
4199 */
4200 bool virtio_is_big_endian(void);
4201 bool virtio_is_big_endian(void)
4202 {
4203 #if defined(TARGET_WORDS_BIGENDIAN)
4204 return true;
4205 #else
4206 return false;
4207 #endif
4208 }
4209
4210 #endif
4211
4212 #ifndef CONFIG_USER_ONLY
4213 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4214 {
4215 MemoryRegionSection *section;
4216
4217 section = phys_page_find(address_space_memory.dispatch,
4218 phys_addr >> TARGET_PAGE_BITS);
4219
4220 return !(memory_region_is_ram(section->mr) ||
4221 memory_region_is_romd(section->mr));
4222 }
4223 #endif