]> git.proxmox.com Git - mirror_qemu.git/blob - exec.c
Rename target_phys_addr_t to hwaddr
[mirror_qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
94
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
98
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
103
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
106
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
109
110 #endif
111
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
120
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
132
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
144
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
148
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
161
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
163
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
169
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
173
174 #if !defined(CONFIG_USER_ONLY)
175
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
182
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191
192 static MemoryRegion io_mem_watch;
193 #endif
194
195 /* statistics */
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
198
199 #ifdef _WIN32
200 static inline void map_exec(void *addr, long size)
201 {
202 DWORD old_protect;
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
205
206 }
207 #else
208 static inline void map_exec(void *addr, long size)
209 {
210 unsigned long start, end, page_size;
211
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
215
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
219
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
222 }
223 #endif
224
225 static void page_init(void)
226 {
227 /* NOTE: we can always suppose that qemu_host_page_size >=
228 TARGET_PAGE_SIZE */
229 #ifdef _WIN32
230 {
231 SYSTEM_INFO system_info;
232
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
235 }
236 #else
237 qemu_real_host_page_size = getpagesize();
238 #endif
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
244
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
246 {
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
249 int i, cnt;
250
251 freep = kinfo_getvmmap(getpid(), &cnt);
252 if (freep) {
253 mmap_lock();
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
256
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
261
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265 } else {
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267 endaddr = ~0ul;
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #endif
270 }
271 }
272 }
273 free(freep);
274 mmap_unlock();
275 }
276 #else
277 FILE *f;
278
279 last_brk = (unsigned long)sbrk(0);
280
281 f = fopen("/compat/linux/proc/self/maps", "r");
282 if (f) {
283 mmap_lock();
284
285 do {
286 unsigned long startaddr, endaddr;
287 int n;
288
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
290
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
296 } else {
297 endaddr = ~0ul;
298 }
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 }
301 } while (!feof(f));
302
303 fclose(f);
304 mmap_unlock();
305 }
306 #endif
307 }
308 #endif
309 }
310
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
312 {
313 PageDesc *pd;
314 void **lp;
315 int i;
316
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
320 do { \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
323 } while (0)
324 #else
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
327 #endif
328
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
331
332 /* Level 2..N-1. */
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334 void **p = *lp;
335
336 if (p == NULL) {
337 if (!alloc) {
338 return NULL;
339 }
340 ALLOC(p, sizeof(void *) * L2_SIZE);
341 *lp = p;
342 }
343
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
345 }
346
347 pd = *lp;
348 if (pd == NULL) {
349 if (!alloc) {
350 return NULL;
351 }
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353 *lp = pd;
354 }
355
356 #undef ALLOC
357
358 return pd + (index & (L2_SIZE - 1));
359 }
360
361 static inline PageDesc *page_find(tb_page_addr_t index)
362 {
363 return page_find_alloc(index, 0);
364 }
365
366 #if !defined(CONFIG_USER_ONLY)
367
368 static void phys_map_node_reserve(unsigned nodes)
369 {
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
377 }
378 }
379
380 static uint16_t phys_map_node_alloc(void)
381 {
382 unsigned i;
383 uint16_t ret;
384
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
391 }
392 return ret;
393 }
394
395 static void phys_map_nodes_reset(void)
396 {
397 phys_map_nodes_nb = 0;
398 }
399
400
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
403 int level)
404 {
405 PhysPageEntry *p;
406 int i;
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
408
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
412 if (level == 0) {
413 for (i = 0; i < L2_SIZE; i++) {
414 p[i].is_leaf = 1;
415 p[i].ptr = phys_section_unassigned;
416 }
417 }
418 } else {
419 p = phys_map_nodes[lp->ptr];
420 }
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
422
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
425 lp->is_leaf = true;
426 lp->ptr = leaf;
427 *index += step;
428 *nb -= step;
429 } else {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
431 }
432 ++lp;
433 }
434 }
435
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
438 uint16_t leaf)
439 {
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
442
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
444 }
445
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
447 {
448 PhysPageEntry lp = d->phys_map;
449 PhysPageEntry *p;
450 int i;
451 uint16_t s_index = phys_section_unassigned;
452
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
455 goto not_found;
456 }
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
459 }
460
461 s_index = lp.ptr;
462 not_found:
463 return &phys_sections[s_index];
464 }
465
466 bool memory_region_is_unassigned(MemoryRegion *mr)
467 {
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
471 }
472
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
475 #endif
476
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
483 #endif
484
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
490 # define USE_MMAP
491 #endif
492
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
496
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
509 #else
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
511 #endif
512
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
514
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
518
519 static inline size_t size_code_gen_buffer(size_t tb_size)
520 {
521 /* Size the buffer. */
522 if (tb_size == 0) {
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525 #else
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
531 #endif
532 }
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
535 }
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
538 }
539 code_gen_buffer_size = tb_size;
540 return tb_size;
541 }
542
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
546
547 static inline void *alloc_code_gen_buffer(void)
548 {
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
551 }
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
554 {
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556 uintptr_t start = 0;
557 void *buf;
558
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
570 flags |= MAP_32BIT;
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
574 }
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
579 # endif
580
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
584 }
585 #else
586 static inline void *alloc_code_gen_buffer(void)
587 {
588 void *buf = g_malloc(code_gen_buffer_size);
589 if (buf) {
590 map_exec(buf, code_gen_buffer_size);
591 }
592 return buf;
593 }
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
595
596 static inline void code_gen_alloc(size_t tb_size)
597 {
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602 exit(1);
603 }
604
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
609 as executable. */
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
612
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
617 }
618
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
621 size. */
622 void tcg_exec_init(unsigned long tb_size)
623 {
624 cpu_gen_init();
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628 page_init();
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
633 #endif
634 }
635
636 bool tcg_enabled(void)
637 {
638 return code_gen_buffer != NULL;
639 }
640
641 void cpu_exec_init_all(void)
642 {
643 #if !defined(CONFIG_USER_ONLY)
644 memory_map_init();
645 io_mem_init();
646 #endif
647 }
648
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
650
651 static int cpu_common_post_load(void *opaque, int version_id)
652 {
653 CPUArchState *env = opaque;
654
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
658 tlb_flush(env, 1);
659
660 return 0;
661 }
662
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675 #endif
676
677 CPUArchState *qemu_get_cpu(int cpu)
678 {
679 CPUArchState *env = first_cpu;
680
681 while (env) {
682 if (env->cpu_index == cpu)
683 break;
684 env = env->next_cpu;
685 }
686
687 return env;
688 }
689
690 void cpu_exec_init(CPUArchState *env)
691 {
692 CPUArchState **penv;
693 int cpu_index;
694
695 #if defined(CONFIG_USER_ONLY)
696 cpu_list_lock();
697 #endif
698 env->next_cpu = NULL;
699 penv = &first_cpu;
700 cpu_index = 0;
701 while (*penv != NULL) {
702 penv = &(*penv)->next_cpu;
703 cpu_index++;
704 }
705 env->cpu_index = cpu_index;
706 env->numa_node = 0;
707 QTAILQ_INIT(&env->breakpoints);
708 QTAILQ_INIT(&env->watchpoints);
709 #ifndef CONFIG_USER_ONLY
710 env->thread_id = qemu_get_thread_id();
711 #endif
712 *penv = env;
713 #if defined(CONFIG_USER_ONLY)
714 cpu_list_unlock();
715 #endif
716 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
717 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
718 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
719 cpu_save, cpu_load, env);
720 #endif
721 }
722
723 /* Allocate a new translation block. Flush the translation buffer if
724 too many translation blocks or too much generated code. */
725 static TranslationBlock *tb_alloc(target_ulong pc)
726 {
727 TranslationBlock *tb;
728
729 if (nb_tbs >= code_gen_max_blocks ||
730 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
731 return NULL;
732 tb = &tbs[nb_tbs++];
733 tb->pc = pc;
734 tb->cflags = 0;
735 return tb;
736 }
737
738 void tb_free(TranslationBlock *tb)
739 {
740 /* In practice this is mostly used for single use temporary TB
741 Ignore the hard cases and just back up if this TB happens to
742 be the last one generated. */
743 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
744 code_gen_ptr = tb->tc_ptr;
745 nb_tbs--;
746 }
747 }
748
749 static inline void invalidate_page_bitmap(PageDesc *p)
750 {
751 if (p->code_bitmap) {
752 g_free(p->code_bitmap);
753 p->code_bitmap = NULL;
754 }
755 p->code_write_count = 0;
756 }
757
758 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
759
760 static void page_flush_tb_1 (int level, void **lp)
761 {
762 int i;
763
764 if (*lp == NULL) {
765 return;
766 }
767 if (level == 0) {
768 PageDesc *pd = *lp;
769 for (i = 0; i < L2_SIZE; ++i) {
770 pd[i].first_tb = NULL;
771 invalidate_page_bitmap(pd + i);
772 }
773 } else {
774 void **pp = *lp;
775 for (i = 0; i < L2_SIZE; ++i) {
776 page_flush_tb_1 (level - 1, pp + i);
777 }
778 }
779 }
780
781 static void page_flush_tb(void)
782 {
783 int i;
784 for (i = 0; i < V_L1_SIZE; i++) {
785 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
786 }
787 }
788
789 /* flush all the translation blocks */
790 /* XXX: tb_flush is currently not thread safe */
791 void tb_flush(CPUArchState *env1)
792 {
793 CPUArchState *env;
794 #if defined(DEBUG_FLUSH)
795 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
796 (unsigned long)(code_gen_ptr - code_gen_buffer),
797 nb_tbs, nb_tbs > 0 ?
798 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
799 #endif
800 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
801 cpu_abort(env1, "Internal error: code buffer overflow\n");
802
803 nb_tbs = 0;
804
805 for(env = first_cpu; env != NULL; env = env->next_cpu) {
806 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
807 }
808
809 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
810 page_flush_tb();
811
812 code_gen_ptr = code_gen_buffer;
813 /* XXX: flush processor icache at this point if cache flush is
814 expensive */
815 tb_flush_count++;
816 }
817
818 #ifdef DEBUG_TB_CHECK
819
820 static void tb_invalidate_check(target_ulong address)
821 {
822 TranslationBlock *tb;
823 int i;
824 address &= TARGET_PAGE_MASK;
825 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
826 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
827 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
828 address >= tb->pc + tb->size)) {
829 printf("ERROR invalidate: address=" TARGET_FMT_lx
830 " PC=%08lx size=%04x\n",
831 address, (long)tb->pc, tb->size);
832 }
833 }
834 }
835 }
836
837 /* verify that all the pages have correct rights for code */
838 static void tb_page_check(void)
839 {
840 TranslationBlock *tb;
841 int i, flags1, flags2;
842
843 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
844 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
845 flags1 = page_get_flags(tb->pc);
846 flags2 = page_get_flags(tb->pc + tb->size - 1);
847 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
848 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
849 (long)tb->pc, tb->size, flags1, flags2);
850 }
851 }
852 }
853 }
854
855 #endif
856
857 /* invalidate one TB */
858 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
859 int next_offset)
860 {
861 TranslationBlock *tb1;
862 for(;;) {
863 tb1 = *ptb;
864 if (tb1 == tb) {
865 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
866 break;
867 }
868 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
869 }
870 }
871
872 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
873 {
874 TranslationBlock *tb1;
875 unsigned int n1;
876
877 for(;;) {
878 tb1 = *ptb;
879 n1 = (uintptr_t)tb1 & 3;
880 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
881 if (tb1 == tb) {
882 *ptb = tb1->page_next[n1];
883 break;
884 }
885 ptb = &tb1->page_next[n1];
886 }
887 }
888
889 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
890 {
891 TranslationBlock *tb1, **ptb;
892 unsigned int n1;
893
894 ptb = &tb->jmp_next[n];
895 tb1 = *ptb;
896 if (tb1) {
897 /* find tb(n) in circular list */
898 for(;;) {
899 tb1 = *ptb;
900 n1 = (uintptr_t)tb1 & 3;
901 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
902 if (n1 == n && tb1 == tb)
903 break;
904 if (n1 == 2) {
905 ptb = &tb1->jmp_first;
906 } else {
907 ptb = &tb1->jmp_next[n1];
908 }
909 }
910 /* now we can suppress tb(n) from the list */
911 *ptb = tb->jmp_next[n];
912
913 tb->jmp_next[n] = NULL;
914 }
915 }
916
917 /* reset the jump entry 'n' of a TB so that it is not chained to
918 another TB */
919 static inline void tb_reset_jump(TranslationBlock *tb, int n)
920 {
921 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
922 }
923
924 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
925 {
926 CPUArchState *env;
927 PageDesc *p;
928 unsigned int h, n1;
929 tb_page_addr_t phys_pc;
930 TranslationBlock *tb1, *tb2;
931
932 /* remove the TB from the hash list */
933 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
934 h = tb_phys_hash_func(phys_pc);
935 tb_remove(&tb_phys_hash[h], tb,
936 offsetof(TranslationBlock, phys_hash_next));
937
938 /* remove the TB from the page list */
939 if (tb->page_addr[0] != page_addr) {
940 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
941 tb_page_remove(&p->first_tb, tb);
942 invalidate_page_bitmap(p);
943 }
944 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
945 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
946 tb_page_remove(&p->first_tb, tb);
947 invalidate_page_bitmap(p);
948 }
949
950 tb_invalidated_flag = 1;
951
952 /* remove the TB from the hash list */
953 h = tb_jmp_cache_hash_func(tb->pc);
954 for(env = first_cpu; env != NULL; env = env->next_cpu) {
955 if (env->tb_jmp_cache[h] == tb)
956 env->tb_jmp_cache[h] = NULL;
957 }
958
959 /* suppress this TB from the two jump lists */
960 tb_jmp_remove(tb, 0);
961 tb_jmp_remove(tb, 1);
962
963 /* suppress any remaining jumps to this TB */
964 tb1 = tb->jmp_first;
965 for(;;) {
966 n1 = (uintptr_t)tb1 & 3;
967 if (n1 == 2)
968 break;
969 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
970 tb2 = tb1->jmp_next[n1];
971 tb_reset_jump(tb1, n1);
972 tb1->jmp_next[n1] = NULL;
973 tb1 = tb2;
974 }
975 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
976
977 tb_phys_invalidate_count++;
978 }
979
980 static inline void set_bits(uint8_t *tab, int start, int len)
981 {
982 int end, mask, end1;
983
984 end = start + len;
985 tab += start >> 3;
986 mask = 0xff << (start & 7);
987 if ((start & ~7) == (end & ~7)) {
988 if (start < end) {
989 mask &= ~(0xff << (end & 7));
990 *tab |= mask;
991 }
992 } else {
993 *tab++ |= mask;
994 start = (start + 8) & ~7;
995 end1 = end & ~7;
996 while (start < end1) {
997 *tab++ = 0xff;
998 start += 8;
999 }
1000 if (start < end) {
1001 mask = ~(0xff << (end & 7));
1002 *tab |= mask;
1003 }
1004 }
1005 }
1006
1007 static void build_page_bitmap(PageDesc *p)
1008 {
1009 int n, tb_start, tb_end;
1010 TranslationBlock *tb;
1011
1012 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1013
1014 tb = p->first_tb;
1015 while (tb != NULL) {
1016 n = (uintptr_t)tb & 3;
1017 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1018 /* NOTE: this is subtle as a TB may span two physical pages */
1019 if (n == 0) {
1020 /* NOTE: tb_end may be after the end of the page, but
1021 it is not a problem */
1022 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1023 tb_end = tb_start + tb->size;
1024 if (tb_end > TARGET_PAGE_SIZE)
1025 tb_end = TARGET_PAGE_SIZE;
1026 } else {
1027 tb_start = 0;
1028 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1029 }
1030 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1031 tb = tb->page_next[n];
1032 }
1033 }
1034
1035 TranslationBlock *tb_gen_code(CPUArchState *env,
1036 target_ulong pc, target_ulong cs_base,
1037 int flags, int cflags)
1038 {
1039 TranslationBlock *tb;
1040 uint8_t *tc_ptr;
1041 tb_page_addr_t phys_pc, phys_page2;
1042 target_ulong virt_page2;
1043 int code_gen_size;
1044
1045 phys_pc = get_page_addr_code(env, pc);
1046 tb = tb_alloc(pc);
1047 if (!tb) {
1048 /* flush must be done */
1049 tb_flush(env);
1050 /* cannot fail at this point */
1051 tb = tb_alloc(pc);
1052 /* Don't forget to invalidate previous TB info. */
1053 tb_invalidated_flag = 1;
1054 }
1055 tc_ptr = code_gen_ptr;
1056 tb->tc_ptr = tc_ptr;
1057 tb->cs_base = cs_base;
1058 tb->flags = flags;
1059 tb->cflags = cflags;
1060 cpu_gen_code(env, tb, &code_gen_size);
1061 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1062 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1063
1064 /* check next page if needed */
1065 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1066 phys_page2 = -1;
1067 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1068 phys_page2 = get_page_addr_code(env, virt_page2);
1069 }
1070 tb_link_page(tb, phys_pc, phys_page2);
1071 return tb;
1072 }
1073
1074 /*
1075 * Invalidate all TBs which intersect with the target physical address range
1076 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1077 * 'is_cpu_write_access' should be true if called from a real cpu write
1078 * access: the virtual CPU will exit the current TB if code is modified inside
1079 * this TB.
1080 */
1081 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1082 int is_cpu_write_access)
1083 {
1084 while (start < end) {
1085 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1086 start &= TARGET_PAGE_MASK;
1087 start += TARGET_PAGE_SIZE;
1088 }
1089 }
1090
1091 /*
1092 * Invalidate all TBs which intersect with the target physical address range
1093 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1094 * 'is_cpu_write_access' should be true if called from a real cpu write
1095 * access: the virtual CPU will exit the current TB if code is modified inside
1096 * this TB.
1097 */
1098 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1099 int is_cpu_write_access)
1100 {
1101 TranslationBlock *tb, *tb_next, *saved_tb;
1102 CPUArchState *env = cpu_single_env;
1103 tb_page_addr_t tb_start, tb_end;
1104 PageDesc *p;
1105 int n;
1106 #ifdef TARGET_HAS_PRECISE_SMC
1107 int current_tb_not_found = is_cpu_write_access;
1108 TranslationBlock *current_tb = NULL;
1109 int current_tb_modified = 0;
1110 target_ulong current_pc = 0;
1111 target_ulong current_cs_base = 0;
1112 int current_flags = 0;
1113 #endif /* TARGET_HAS_PRECISE_SMC */
1114
1115 p = page_find(start >> TARGET_PAGE_BITS);
1116 if (!p)
1117 return;
1118 if (!p->code_bitmap &&
1119 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1120 is_cpu_write_access) {
1121 /* build code bitmap */
1122 build_page_bitmap(p);
1123 }
1124
1125 /* we remove all the TBs in the range [start, end[ */
1126 /* XXX: see if in some cases it could be faster to invalidate all the code */
1127 tb = p->first_tb;
1128 while (tb != NULL) {
1129 n = (uintptr_t)tb & 3;
1130 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1131 tb_next = tb->page_next[n];
1132 /* NOTE: this is subtle as a TB may span two physical pages */
1133 if (n == 0) {
1134 /* NOTE: tb_end may be after the end of the page, but
1135 it is not a problem */
1136 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1137 tb_end = tb_start + tb->size;
1138 } else {
1139 tb_start = tb->page_addr[1];
1140 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1141 }
1142 if (!(tb_end <= start || tb_start >= end)) {
1143 #ifdef TARGET_HAS_PRECISE_SMC
1144 if (current_tb_not_found) {
1145 current_tb_not_found = 0;
1146 current_tb = NULL;
1147 if (env->mem_io_pc) {
1148 /* now we have a real cpu fault */
1149 current_tb = tb_find_pc(env->mem_io_pc);
1150 }
1151 }
1152 if (current_tb == tb &&
1153 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1154 /* If we are modifying the current TB, we must stop
1155 its execution. We could be more precise by checking
1156 that the modification is after the current PC, but it
1157 would require a specialized function to partially
1158 restore the CPU state */
1159
1160 current_tb_modified = 1;
1161 cpu_restore_state(current_tb, env, env->mem_io_pc);
1162 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1163 &current_flags);
1164 }
1165 #endif /* TARGET_HAS_PRECISE_SMC */
1166 /* we need to do that to handle the case where a signal
1167 occurs while doing tb_phys_invalidate() */
1168 saved_tb = NULL;
1169 if (env) {
1170 saved_tb = env->current_tb;
1171 env->current_tb = NULL;
1172 }
1173 tb_phys_invalidate(tb, -1);
1174 if (env) {
1175 env->current_tb = saved_tb;
1176 if (env->interrupt_request && env->current_tb)
1177 cpu_interrupt(env, env->interrupt_request);
1178 }
1179 }
1180 tb = tb_next;
1181 }
1182 #if !defined(CONFIG_USER_ONLY)
1183 /* if no code remaining, no need to continue to use slow writes */
1184 if (!p->first_tb) {
1185 invalidate_page_bitmap(p);
1186 if (is_cpu_write_access) {
1187 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1188 }
1189 }
1190 #endif
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (current_tb_modified) {
1193 /* we generate a block containing just the instruction
1194 modifying the memory. It will ensure that it cannot modify
1195 itself */
1196 env->current_tb = NULL;
1197 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1198 cpu_resume_from_signal(env, NULL);
1199 }
1200 #endif
1201 }
1202
1203 /* len must be <= 8 and start must be a multiple of len */
1204 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1205 {
1206 PageDesc *p;
1207 int offset, b;
1208 #if 0
1209 if (1) {
1210 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1211 cpu_single_env->mem_io_vaddr, len,
1212 cpu_single_env->eip,
1213 cpu_single_env->eip +
1214 (intptr_t)cpu_single_env->segs[R_CS].base);
1215 }
1216 #endif
1217 p = page_find(start >> TARGET_PAGE_BITS);
1218 if (!p)
1219 return;
1220 if (p->code_bitmap) {
1221 offset = start & ~TARGET_PAGE_MASK;
1222 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1223 if (b & ((1 << len) - 1))
1224 goto do_invalidate;
1225 } else {
1226 do_invalidate:
1227 tb_invalidate_phys_page_range(start, start + len, 1);
1228 }
1229 }
1230
1231 #if !defined(CONFIG_SOFTMMU)
1232 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1233 uintptr_t pc, void *puc)
1234 {
1235 TranslationBlock *tb;
1236 PageDesc *p;
1237 int n;
1238 #ifdef TARGET_HAS_PRECISE_SMC
1239 TranslationBlock *current_tb = NULL;
1240 CPUArchState *env = cpu_single_env;
1241 int current_tb_modified = 0;
1242 target_ulong current_pc = 0;
1243 target_ulong current_cs_base = 0;
1244 int current_flags = 0;
1245 #endif
1246
1247 addr &= TARGET_PAGE_MASK;
1248 p = page_find(addr >> TARGET_PAGE_BITS);
1249 if (!p)
1250 return;
1251 tb = p->first_tb;
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (tb && pc != 0) {
1254 current_tb = tb_find_pc(pc);
1255 }
1256 #endif
1257 while (tb != NULL) {
1258 n = (uintptr_t)tb & 3;
1259 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (current_tb == tb &&
1262 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1263 /* If we are modifying the current TB, we must stop
1264 its execution. We could be more precise by checking
1265 that the modification is after the current PC, but it
1266 would require a specialized function to partially
1267 restore the CPU state */
1268
1269 current_tb_modified = 1;
1270 cpu_restore_state(current_tb, env, pc);
1271 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1272 &current_flags);
1273 }
1274 #endif /* TARGET_HAS_PRECISE_SMC */
1275 tb_phys_invalidate(tb, addr);
1276 tb = tb->page_next[n];
1277 }
1278 p->first_tb = NULL;
1279 #ifdef TARGET_HAS_PRECISE_SMC
1280 if (current_tb_modified) {
1281 /* we generate a block containing just the instruction
1282 modifying the memory. It will ensure that it cannot modify
1283 itself */
1284 env->current_tb = NULL;
1285 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1286 cpu_resume_from_signal(env, puc);
1287 }
1288 #endif
1289 }
1290 #endif
1291
1292 /* add the tb in the target page and protect it if necessary */
1293 static inline void tb_alloc_page(TranslationBlock *tb,
1294 unsigned int n, tb_page_addr_t page_addr)
1295 {
1296 PageDesc *p;
1297 #ifndef CONFIG_USER_ONLY
1298 bool page_already_protected;
1299 #endif
1300
1301 tb->page_addr[n] = page_addr;
1302 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1303 tb->page_next[n] = p->first_tb;
1304 #ifndef CONFIG_USER_ONLY
1305 page_already_protected = p->first_tb != NULL;
1306 #endif
1307 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1308 invalidate_page_bitmap(p);
1309
1310 #if defined(TARGET_HAS_SMC) || 1
1311
1312 #if defined(CONFIG_USER_ONLY)
1313 if (p->flags & PAGE_WRITE) {
1314 target_ulong addr;
1315 PageDesc *p2;
1316 int prot;
1317
1318 /* force the host page as non writable (writes will have a
1319 page fault + mprotect overhead) */
1320 page_addr &= qemu_host_page_mask;
1321 prot = 0;
1322 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1323 addr += TARGET_PAGE_SIZE) {
1324
1325 p2 = page_find (addr >> TARGET_PAGE_BITS);
1326 if (!p2)
1327 continue;
1328 prot |= p2->flags;
1329 p2->flags &= ~PAGE_WRITE;
1330 }
1331 mprotect(g2h(page_addr), qemu_host_page_size,
1332 (prot & PAGE_BITS) & ~PAGE_WRITE);
1333 #ifdef DEBUG_TB_INVALIDATE
1334 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1335 page_addr);
1336 #endif
1337 }
1338 #else
1339 /* if some code is already present, then the pages are already
1340 protected. So we handle the case where only the first TB is
1341 allocated in a physical page */
1342 if (!page_already_protected) {
1343 tlb_protect_code(page_addr);
1344 }
1345 #endif
1346
1347 #endif /* TARGET_HAS_SMC */
1348 }
1349
1350 /* add a new TB and link it to the physical page tables. phys_page2 is
1351 (-1) to indicate that only one page contains the TB. */
1352 void tb_link_page(TranslationBlock *tb,
1353 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1354 {
1355 unsigned int h;
1356 TranslationBlock **ptb;
1357
1358 /* Grab the mmap lock to stop another thread invalidating this TB
1359 before we are done. */
1360 mmap_lock();
1361 /* add in the physical hash table */
1362 h = tb_phys_hash_func(phys_pc);
1363 ptb = &tb_phys_hash[h];
1364 tb->phys_hash_next = *ptb;
1365 *ptb = tb;
1366
1367 /* add in the page list */
1368 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1369 if (phys_page2 != -1)
1370 tb_alloc_page(tb, 1, phys_page2);
1371 else
1372 tb->page_addr[1] = -1;
1373
1374 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1375 tb->jmp_next[0] = NULL;
1376 tb->jmp_next[1] = NULL;
1377
1378 /* init original jump addresses */
1379 if (tb->tb_next_offset[0] != 0xffff)
1380 tb_reset_jump(tb, 0);
1381 if (tb->tb_next_offset[1] != 0xffff)
1382 tb_reset_jump(tb, 1);
1383
1384 #ifdef DEBUG_TB_CHECK
1385 tb_page_check();
1386 #endif
1387 mmap_unlock();
1388 }
1389
1390 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1391 tb[1].tc_ptr. Return NULL if not found */
1392 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1393 {
1394 int m_min, m_max, m;
1395 uintptr_t v;
1396 TranslationBlock *tb;
1397
1398 if (nb_tbs <= 0)
1399 return NULL;
1400 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1401 tc_ptr >= (uintptr_t)code_gen_ptr) {
1402 return NULL;
1403 }
1404 /* binary search (cf Knuth) */
1405 m_min = 0;
1406 m_max = nb_tbs - 1;
1407 while (m_min <= m_max) {
1408 m = (m_min + m_max) >> 1;
1409 tb = &tbs[m];
1410 v = (uintptr_t)tb->tc_ptr;
1411 if (v == tc_ptr)
1412 return tb;
1413 else if (tc_ptr < v) {
1414 m_max = m - 1;
1415 } else {
1416 m_min = m + 1;
1417 }
1418 }
1419 return &tbs[m_max];
1420 }
1421
1422 static void tb_reset_jump_recursive(TranslationBlock *tb);
1423
1424 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1425 {
1426 TranslationBlock *tb1, *tb_next, **ptb;
1427 unsigned int n1;
1428
1429 tb1 = tb->jmp_next[n];
1430 if (tb1 != NULL) {
1431 /* find head of list */
1432 for(;;) {
1433 n1 = (uintptr_t)tb1 & 3;
1434 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1435 if (n1 == 2)
1436 break;
1437 tb1 = tb1->jmp_next[n1];
1438 }
1439 /* we are now sure now that tb jumps to tb1 */
1440 tb_next = tb1;
1441
1442 /* remove tb from the jmp_first list */
1443 ptb = &tb_next->jmp_first;
1444 for(;;) {
1445 tb1 = *ptb;
1446 n1 = (uintptr_t)tb1 & 3;
1447 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1448 if (n1 == n && tb1 == tb)
1449 break;
1450 ptb = &tb1->jmp_next[n1];
1451 }
1452 *ptb = tb->jmp_next[n];
1453 tb->jmp_next[n] = NULL;
1454
1455 /* suppress the jump to next tb in generated code */
1456 tb_reset_jump(tb, n);
1457
1458 /* suppress jumps in the tb on which we could have jumped */
1459 tb_reset_jump_recursive(tb_next);
1460 }
1461 }
1462
1463 static void tb_reset_jump_recursive(TranslationBlock *tb)
1464 {
1465 tb_reset_jump_recursive2(tb, 0);
1466 tb_reset_jump_recursive2(tb, 1);
1467 }
1468
1469 #if defined(TARGET_HAS_ICE)
1470 #if defined(CONFIG_USER_ONLY)
1471 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1472 {
1473 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1474 }
1475 #else
1476 void tb_invalidate_phys_addr(hwaddr addr)
1477 {
1478 ram_addr_t ram_addr;
1479 MemoryRegionSection *section;
1480
1481 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1482 if (!(memory_region_is_ram(section->mr)
1483 || (section->mr->rom_device && section->mr->readable))) {
1484 return;
1485 }
1486 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1487 + memory_region_section_addr(section, addr);
1488 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1489 }
1490
1491 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1492 {
1493 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1494 (pc & ~TARGET_PAGE_MASK));
1495 }
1496 #endif
1497 #endif /* TARGET_HAS_ICE */
1498
1499 #if defined(CONFIG_USER_ONLY)
1500 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1501
1502 {
1503 }
1504
1505 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1506 int flags, CPUWatchpoint **watchpoint)
1507 {
1508 return -ENOSYS;
1509 }
1510 #else
1511 /* Add a watchpoint. */
1512 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1513 int flags, CPUWatchpoint **watchpoint)
1514 {
1515 target_ulong len_mask = ~(len - 1);
1516 CPUWatchpoint *wp;
1517
1518 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1519 if ((len & (len - 1)) || (addr & ~len_mask) ||
1520 len == 0 || len > TARGET_PAGE_SIZE) {
1521 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1522 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1523 return -EINVAL;
1524 }
1525 wp = g_malloc(sizeof(*wp));
1526
1527 wp->vaddr = addr;
1528 wp->len_mask = len_mask;
1529 wp->flags = flags;
1530
1531 /* keep all GDB-injected watchpoints in front */
1532 if (flags & BP_GDB)
1533 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1534 else
1535 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1536
1537 tlb_flush_page(env, addr);
1538
1539 if (watchpoint)
1540 *watchpoint = wp;
1541 return 0;
1542 }
1543
1544 /* Remove a specific watchpoint. */
1545 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1546 int flags)
1547 {
1548 target_ulong len_mask = ~(len - 1);
1549 CPUWatchpoint *wp;
1550
1551 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1552 if (addr == wp->vaddr && len_mask == wp->len_mask
1553 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1554 cpu_watchpoint_remove_by_ref(env, wp);
1555 return 0;
1556 }
1557 }
1558 return -ENOENT;
1559 }
1560
1561 /* Remove a specific watchpoint by reference. */
1562 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1563 {
1564 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1565
1566 tlb_flush_page(env, watchpoint->vaddr);
1567
1568 g_free(watchpoint);
1569 }
1570
1571 /* Remove all matching watchpoints. */
1572 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1573 {
1574 CPUWatchpoint *wp, *next;
1575
1576 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1577 if (wp->flags & mask)
1578 cpu_watchpoint_remove_by_ref(env, wp);
1579 }
1580 }
1581 #endif
1582
1583 /* Add a breakpoint. */
1584 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1585 CPUBreakpoint **breakpoint)
1586 {
1587 #if defined(TARGET_HAS_ICE)
1588 CPUBreakpoint *bp;
1589
1590 bp = g_malloc(sizeof(*bp));
1591
1592 bp->pc = pc;
1593 bp->flags = flags;
1594
1595 /* keep all GDB-injected breakpoints in front */
1596 if (flags & BP_GDB)
1597 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1598 else
1599 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1600
1601 breakpoint_invalidate(env, pc);
1602
1603 if (breakpoint)
1604 *breakpoint = bp;
1605 return 0;
1606 #else
1607 return -ENOSYS;
1608 #endif
1609 }
1610
1611 /* Remove a specific breakpoint. */
1612 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1613 {
1614 #if defined(TARGET_HAS_ICE)
1615 CPUBreakpoint *bp;
1616
1617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1618 if (bp->pc == pc && bp->flags == flags) {
1619 cpu_breakpoint_remove_by_ref(env, bp);
1620 return 0;
1621 }
1622 }
1623 return -ENOENT;
1624 #else
1625 return -ENOSYS;
1626 #endif
1627 }
1628
1629 /* Remove a specific breakpoint by reference. */
1630 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1631 {
1632 #if defined(TARGET_HAS_ICE)
1633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1634
1635 breakpoint_invalidate(env, breakpoint->pc);
1636
1637 g_free(breakpoint);
1638 #endif
1639 }
1640
1641 /* Remove all matching breakpoints. */
1642 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1643 {
1644 #if defined(TARGET_HAS_ICE)
1645 CPUBreakpoint *bp, *next;
1646
1647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1648 if (bp->flags & mask)
1649 cpu_breakpoint_remove_by_ref(env, bp);
1650 }
1651 #endif
1652 }
1653
1654 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1655 CPU loop after each instruction */
1656 void cpu_single_step(CPUArchState *env, int enabled)
1657 {
1658 #if defined(TARGET_HAS_ICE)
1659 if (env->singlestep_enabled != enabled) {
1660 env->singlestep_enabled = enabled;
1661 if (kvm_enabled())
1662 kvm_update_guest_debug(env, 0);
1663 else {
1664 /* must flush all the translated code to avoid inconsistencies */
1665 /* XXX: only flush what is necessary */
1666 tb_flush(env);
1667 }
1668 }
1669 #endif
1670 }
1671
1672 static void cpu_unlink_tb(CPUArchState *env)
1673 {
1674 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1675 problem and hope the cpu will stop of its own accord. For userspace
1676 emulation this often isn't actually as bad as it sounds. Often
1677 signals are used primarily to interrupt blocking syscalls. */
1678 TranslationBlock *tb;
1679 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1680
1681 spin_lock(&interrupt_lock);
1682 tb = env->current_tb;
1683 /* if the cpu is currently executing code, we must unlink it and
1684 all the potentially executing TB */
1685 if (tb) {
1686 env->current_tb = NULL;
1687 tb_reset_jump_recursive(tb);
1688 }
1689 spin_unlock(&interrupt_lock);
1690 }
1691
1692 #ifndef CONFIG_USER_ONLY
1693 /* mask must never be zero, except for A20 change call */
1694 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1695 {
1696 int old_mask;
1697
1698 old_mask = env->interrupt_request;
1699 env->interrupt_request |= mask;
1700
1701 /*
1702 * If called from iothread context, wake the target cpu in
1703 * case its halted.
1704 */
1705 if (!qemu_cpu_is_self(env)) {
1706 qemu_cpu_kick(env);
1707 return;
1708 }
1709
1710 if (use_icount) {
1711 env->icount_decr.u16.high = 0xffff;
1712 if (!can_do_io(env)
1713 && (mask & ~old_mask) != 0) {
1714 cpu_abort(env, "Raised interrupt while not in I/O function");
1715 }
1716 } else {
1717 cpu_unlink_tb(env);
1718 }
1719 }
1720
1721 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1722
1723 #else /* CONFIG_USER_ONLY */
1724
1725 void cpu_interrupt(CPUArchState *env, int mask)
1726 {
1727 env->interrupt_request |= mask;
1728 cpu_unlink_tb(env);
1729 }
1730 #endif /* CONFIG_USER_ONLY */
1731
1732 void cpu_reset_interrupt(CPUArchState *env, int mask)
1733 {
1734 env->interrupt_request &= ~mask;
1735 }
1736
1737 void cpu_exit(CPUArchState *env)
1738 {
1739 env->exit_request = 1;
1740 cpu_unlink_tb(env);
1741 }
1742
1743 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1744 {
1745 va_list ap;
1746 va_list ap2;
1747
1748 va_start(ap, fmt);
1749 va_copy(ap2, ap);
1750 fprintf(stderr, "qemu: fatal: ");
1751 vfprintf(stderr, fmt, ap);
1752 fprintf(stderr, "\n");
1753 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1754 if (qemu_log_enabled()) {
1755 qemu_log("qemu: fatal: ");
1756 qemu_log_vprintf(fmt, ap2);
1757 qemu_log("\n");
1758 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1759 qemu_log_flush();
1760 qemu_log_close();
1761 }
1762 va_end(ap2);
1763 va_end(ap);
1764 #if defined(CONFIG_USER_ONLY)
1765 {
1766 struct sigaction act;
1767 sigfillset(&act.sa_mask);
1768 act.sa_handler = SIG_DFL;
1769 sigaction(SIGABRT, &act, NULL);
1770 }
1771 #endif
1772 abort();
1773 }
1774
1775 CPUArchState *cpu_copy(CPUArchState *env)
1776 {
1777 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1778 CPUArchState *next_cpu = new_env->next_cpu;
1779 int cpu_index = new_env->cpu_index;
1780 #if defined(TARGET_HAS_ICE)
1781 CPUBreakpoint *bp;
1782 CPUWatchpoint *wp;
1783 #endif
1784
1785 memcpy(new_env, env, sizeof(CPUArchState));
1786
1787 /* Preserve chaining and index. */
1788 new_env->next_cpu = next_cpu;
1789 new_env->cpu_index = cpu_index;
1790
1791 /* Clone all break/watchpoints.
1792 Note: Once we support ptrace with hw-debug register access, make sure
1793 BP_CPU break/watchpoints are handled correctly on clone. */
1794 QTAILQ_INIT(&env->breakpoints);
1795 QTAILQ_INIT(&env->watchpoints);
1796 #if defined(TARGET_HAS_ICE)
1797 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1798 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1799 }
1800 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1801 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1802 wp->flags, NULL);
1803 }
1804 #endif
1805
1806 return new_env;
1807 }
1808
1809 #if !defined(CONFIG_USER_ONLY)
1810 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1811 {
1812 unsigned int i;
1813
1814 /* Discard jump cache entries for any tb which might potentially
1815 overlap the flushed page. */
1816 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1817 memset (&env->tb_jmp_cache[i], 0,
1818 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1819
1820 i = tb_jmp_cache_hash_page(addr);
1821 memset (&env->tb_jmp_cache[i], 0,
1822 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1823 }
1824
1825 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1826 uintptr_t length)
1827 {
1828 uintptr_t start1;
1829
1830 /* we modify the TLB cache so that the dirty bit will be set again
1831 when accessing the range */
1832 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1833 /* Check that we don't span multiple blocks - this breaks the
1834 address comparisons below. */
1835 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1836 != (end - 1) - start) {
1837 abort();
1838 }
1839 cpu_tlb_reset_dirty_all(start1, length);
1840
1841 }
1842
1843 /* Note: start and end must be within the same ram block. */
1844 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1845 int dirty_flags)
1846 {
1847 uintptr_t length;
1848
1849 start &= TARGET_PAGE_MASK;
1850 end = TARGET_PAGE_ALIGN(end);
1851
1852 length = end - start;
1853 if (length == 0)
1854 return;
1855 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1856
1857 if (tcg_enabled()) {
1858 tlb_reset_dirty_range_all(start, end, length);
1859 }
1860 }
1861
1862 int cpu_physical_memory_set_dirty_tracking(int enable)
1863 {
1864 int ret = 0;
1865 in_migration = enable;
1866 return ret;
1867 }
1868
1869 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1870 MemoryRegionSection *section,
1871 target_ulong vaddr,
1872 hwaddr paddr,
1873 int prot,
1874 target_ulong *address)
1875 {
1876 hwaddr iotlb;
1877 CPUWatchpoint *wp;
1878
1879 if (memory_region_is_ram(section->mr)) {
1880 /* Normal RAM. */
1881 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1882 + memory_region_section_addr(section, paddr);
1883 if (!section->readonly) {
1884 iotlb |= phys_section_notdirty;
1885 } else {
1886 iotlb |= phys_section_rom;
1887 }
1888 } else {
1889 /* IO handlers are currently passed a physical address.
1890 It would be nice to pass an offset from the base address
1891 of that region. This would avoid having to special case RAM,
1892 and avoid full address decoding in every device.
1893 We can't use the high bits of pd for this because
1894 IO_MEM_ROMD uses these as a ram address. */
1895 iotlb = section - phys_sections;
1896 iotlb += memory_region_section_addr(section, paddr);
1897 }
1898
1899 /* Make accesses to pages with watchpoints go via the
1900 watchpoint trap routines. */
1901 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1902 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1903 /* Avoid trapping reads of pages with a write breakpoint. */
1904 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1905 iotlb = phys_section_watch + paddr;
1906 *address |= TLB_MMIO;
1907 break;
1908 }
1909 }
1910 }
1911
1912 return iotlb;
1913 }
1914
1915 #else
1916 /*
1917 * Walks guest process memory "regions" one by one
1918 * and calls callback function 'fn' for each region.
1919 */
1920
1921 struct walk_memory_regions_data
1922 {
1923 walk_memory_regions_fn fn;
1924 void *priv;
1925 uintptr_t start;
1926 int prot;
1927 };
1928
1929 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1930 abi_ulong end, int new_prot)
1931 {
1932 if (data->start != -1ul) {
1933 int rc = data->fn(data->priv, data->start, end, data->prot);
1934 if (rc != 0) {
1935 return rc;
1936 }
1937 }
1938
1939 data->start = (new_prot ? end : -1ul);
1940 data->prot = new_prot;
1941
1942 return 0;
1943 }
1944
1945 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1946 abi_ulong base, int level, void **lp)
1947 {
1948 abi_ulong pa;
1949 int i, rc;
1950
1951 if (*lp == NULL) {
1952 return walk_memory_regions_end(data, base, 0);
1953 }
1954
1955 if (level == 0) {
1956 PageDesc *pd = *lp;
1957 for (i = 0; i < L2_SIZE; ++i) {
1958 int prot = pd[i].flags;
1959
1960 pa = base | (i << TARGET_PAGE_BITS);
1961 if (prot != data->prot) {
1962 rc = walk_memory_regions_end(data, pa, prot);
1963 if (rc != 0) {
1964 return rc;
1965 }
1966 }
1967 }
1968 } else {
1969 void **pp = *lp;
1970 for (i = 0; i < L2_SIZE; ++i) {
1971 pa = base | ((abi_ulong)i <<
1972 (TARGET_PAGE_BITS + L2_BITS * level));
1973 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1974 if (rc != 0) {
1975 return rc;
1976 }
1977 }
1978 }
1979
1980 return 0;
1981 }
1982
1983 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1984 {
1985 struct walk_memory_regions_data data;
1986 uintptr_t i;
1987
1988 data.fn = fn;
1989 data.priv = priv;
1990 data.start = -1ul;
1991 data.prot = 0;
1992
1993 for (i = 0; i < V_L1_SIZE; i++) {
1994 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1995 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1996 if (rc != 0) {
1997 return rc;
1998 }
1999 }
2000
2001 return walk_memory_regions_end(&data, 0, 0);
2002 }
2003
2004 static int dump_region(void *priv, abi_ulong start,
2005 abi_ulong end, unsigned long prot)
2006 {
2007 FILE *f = (FILE *)priv;
2008
2009 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2010 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2011 start, end, end - start,
2012 ((prot & PAGE_READ) ? 'r' : '-'),
2013 ((prot & PAGE_WRITE) ? 'w' : '-'),
2014 ((prot & PAGE_EXEC) ? 'x' : '-'));
2015
2016 return (0);
2017 }
2018
2019 /* dump memory mappings */
2020 void page_dump(FILE *f)
2021 {
2022 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2023 "start", "end", "size", "prot");
2024 walk_memory_regions(f, dump_region);
2025 }
2026
2027 int page_get_flags(target_ulong address)
2028 {
2029 PageDesc *p;
2030
2031 p = page_find(address >> TARGET_PAGE_BITS);
2032 if (!p)
2033 return 0;
2034 return p->flags;
2035 }
2036
2037 /* Modify the flags of a page and invalidate the code if necessary.
2038 The flag PAGE_WRITE_ORG is positioned automatically depending
2039 on PAGE_WRITE. The mmap_lock should already be held. */
2040 void page_set_flags(target_ulong start, target_ulong end, int flags)
2041 {
2042 target_ulong addr, len;
2043
2044 /* This function should never be called with addresses outside the
2045 guest address space. If this assert fires, it probably indicates
2046 a missing call to h2g_valid. */
2047 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2048 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2049 #endif
2050 assert(start < end);
2051
2052 start = start & TARGET_PAGE_MASK;
2053 end = TARGET_PAGE_ALIGN(end);
2054
2055 if (flags & PAGE_WRITE) {
2056 flags |= PAGE_WRITE_ORG;
2057 }
2058
2059 for (addr = start, len = end - start;
2060 len != 0;
2061 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2062 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2063
2064 /* If the write protection bit is set, then we invalidate
2065 the code inside. */
2066 if (!(p->flags & PAGE_WRITE) &&
2067 (flags & PAGE_WRITE) &&
2068 p->first_tb) {
2069 tb_invalidate_phys_page(addr, 0, NULL);
2070 }
2071 p->flags = flags;
2072 }
2073 }
2074
2075 int page_check_range(target_ulong start, target_ulong len, int flags)
2076 {
2077 PageDesc *p;
2078 target_ulong end;
2079 target_ulong addr;
2080
2081 /* This function should never be called with addresses outside the
2082 guest address space. If this assert fires, it probably indicates
2083 a missing call to h2g_valid. */
2084 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2085 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2086 #endif
2087
2088 if (len == 0) {
2089 return 0;
2090 }
2091 if (start + len - 1 < start) {
2092 /* We've wrapped around. */
2093 return -1;
2094 }
2095
2096 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2097 start = start & TARGET_PAGE_MASK;
2098
2099 for (addr = start, len = end - start;
2100 len != 0;
2101 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2102 p = page_find(addr >> TARGET_PAGE_BITS);
2103 if( !p )
2104 return -1;
2105 if( !(p->flags & PAGE_VALID) )
2106 return -1;
2107
2108 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2109 return -1;
2110 if (flags & PAGE_WRITE) {
2111 if (!(p->flags & PAGE_WRITE_ORG))
2112 return -1;
2113 /* unprotect the page if it was put read-only because it
2114 contains translated code */
2115 if (!(p->flags & PAGE_WRITE)) {
2116 if (!page_unprotect(addr, 0, NULL))
2117 return -1;
2118 }
2119 return 0;
2120 }
2121 }
2122 return 0;
2123 }
2124
2125 /* called from signal handler: invalidate the code and unprotect the
2126 page. Return TRUE if the fault was successfully handled. */
2127 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2128 {
2129 unsigned int prot;
2130 PageDesc *p;
2131 target_ulong host_start, host_end, addr;
2132
2133 /* Technically this isn't safe inside a signal handler. However we
2134 know this only ever happens in a synchronous SEGV handler, so in
2135 practice it seems to be ok. */
2136 mmap_lock();
2137
2138 p = page_find(address >> TARGET_PAGE_BITS);
2139 if (!p) {
2140 mmap_unlock();
2141 return 0;
2142 }
2143
2144 /* if the page was really writable, then we change its
2145 protection back to writable */
2146 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2147 host_start = address & qemu_host_page_mask;
2148 host_end = host_start + qemu_host_page_size;
2149
2150 prot = 0;
2151 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2152 p = page_find(addr >> TARGET_PAGE_BITS);
2153 p->flags |= PAGE_WRITE;
2154 prot |= p->flags;
2155
2156 /* and since the content will be modified, we must invalidate
2157 the corresponding translated code. */
2158 tb_invalidate_phys_page(addr, pc, puc);
2159 #ifdef DEBUG_TB_CHECK
2160 tb_invalidate_check(addr);
2161 #endif
2162 }
2163 mprotect((void *)g2h(host_start), qemu_host_page_size,
2164 prot & PAGE_BITS);
2165
2166 mmap_unlock();
2167 return 1;
2168 }
2169 mmap_unlock();
2170 return 0;
2171 }
2172 #endif /* defined(CONFIG_USER_ONLY) */
2173
2174 #if !defined(CONFIG_USER_ONLY)
2175
2176 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2177 typedef struct subpage_t {
2178 MemoryRegion iomem;
2179 hwaddr base;
2180 uint16_t sub_section[TARGET_PAGE_SIZE];
2181 } subpage_t;
2182
2183 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2184 uint16_t section);
2185 static subpage_t *subpage_init(hwaddr base);
2186 static void destroy_page_desc(uint16_t section_index)
2187 {
2188 MemoryRegionSection *section = &phys_sections[section_index];
2189 MemoryRegion *mr = section->mr;
2190
2191 if (mr->subpage) {
2192 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2193 memory_region_destroy(&subpage->iomem);
2194 g_free(subpage);
2195 }
2196 }
2197
2198 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2199 {
2200 unsigned i;
2201 PhysPageEntry *p;
2202
2203 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2204 return;
2205 }
2206
2207 p = phys_map_nodes[lp->ptr];
2208 for (i = 0; i < L2_SIZE; ++i) {
2209 if (!p[i].is_leaf) {
2210 destroy_l2_mapping(&p[i], level - 1);
2211 } else {
2212 destroy_page_desc(p[i].ptr);
2213 }
2214 }
2215 lp->is_leaf = 0;
2216 lp->ptr = PHYS_MAP_NODE_NIL;
2217 }
2218
2219 static void destroy_all_mappings(AddressSpaceDispatch *d)
2220 {
2221 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2222 phys_map_nodes_reset();
2223 }
2224
2225 static uint16_t phys_section_add(MemoryRegionSection *section)
2226 {
2227 if (phys_sections_nb == phys_sections_nb_alloc) {
2228 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2229 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2230 phys_sections_nb_alloc);
2231 }
2232 phys_sections[phys_sections_nb] = *section;
2233 return phys_sections_nb++;
2234 }
2235
2236 static void phys_sections_clear(void)
2237 {
2238 phys_sections_nb = 0;
2239 }
2240
2241 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2242 {
2243 subpage_t *subpage;
2244 hwaddr base = section->offset_within_address_space
2245 & TARGET_PAGE_MASK;
2246 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2247 MemoryRegionSection subsection = {
2248 .offset_within_address_space = base,
2249 .size = TARGET_PAGE_SIZE,
2250 };
2251 hwaddr start, end;
2252
2253 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2254
2255 if (!(existing->mr->subpage)) {
2256 subpage = subpage_init(base);
2257 subsection.mr = &subpage->iomem;
2258 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2259 phys_section_add(&subsection));
2260 } else {
2261 subpage = container_of(existing->mr, subpage_t, iomem);
2262 }
2263 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2264 end = start + section->size - 1;
2265 subpage_register(subpage, start, end, phys_section_add(section));
2266 }
2267
2268
2269 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2270 {
2271 hwaddr start_addr = section->offset_within_address_space;
2272 ram_addr_t size = section->size;
2273 hwaddr addr;
2274 uint16_t section_index = phys_section_add(section);
2275
2276 assert(size);
2277
2278 addr = start_addr;
2279 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2280 section_index);
2281 }
2282
2283 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2284 {
2285 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2286 MemoryRegionSection now = *section, remain = *section;
2287
2288 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2289 || (now.size < TARGET_PAGE_SIZE)) {
2290 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2291 - now.offset_within_address_space,
2292 now.size);
2293 register_subpage(d, &now);
2294 remain.size -= now.size;
2295 remain.offset_within_address_space += now.size;
2296 remain.offset_within_region += now.size;
2297 }
2298 while (remain.size >= TARGET_PAGE_SIZE) {
2299 now = remain;
2300 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2301 now.size = TARGET_PAGE_SIZE;
2302 register_subpage(d, &now);
2303 } else {
2304 now.size &= TARGET_PAGE_MASK;
2305 register_multipage(d, &now);
2306 }
2307 remain.size -= now.size;
2308 remain.offset_within_address_space += now.size;
2309 remain.offset_within_region += now.size;
2310 }
2311 now = remain;
2312 if (now.size) {
2313 register_subpage(d, &now);
2314 }
2315 }
2316
2317 void qemu_flush_coalesced_mmio_buffer(void)
2318 {
2319 if (kvm_enabled())
2320 kvm_flush_coalesced_mmio_buffer();
2321 }
2322
2323 #if defined(__linux__) && !defined(TARGET_S390X)
2324
2325 #include <sys/vfs.h>
2326
2327 #define HUGETLBFS_MAGIC 0x958458f6
2328
2329 static long gethugepagesize(const char *path)
2330 {
2331 struct statfs fs;
2332 int ret;
2333
2334 do {
2335 ret = statfs(path, &fs);
2336 } while (ret != 0 && errno == EINTR);
2337
2338 if (ret != 0) {
2339 perror(path);
2340 return 0;
2341 }
2342
2343 if (fs.f_type != HUGETLBFS_MAGIC)
2344 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2345
2346 return fs.f_bsize;
2347 }
2348
2349 static void *file_ram_alloc(RAMBlock *block,
2350 ram_addr_t memory,
2351 const char *path)
2352 {
2353 char *filename;
2354 void *area;
2355 int fd;
2356 #ifdef MAP_POPULATE
2357 int flags;
2358 #endif
2359 unsigned long hpagesize;
2360
2361 hpagesize = gethugepagesize(path);
2362 if (!hpagesize) {
2363 return NULL;
2364 }
2365
2366 if (memory < hpagesize) {
2367 return NULL;
2368 }
2369
2370 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2371 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2372 return NULL;
2373 }
2374
2375 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2376 return NULL;
2377 }
2378
2379 fd = mkstemp(filename);
2380 if (fd < 0) {
2381 perror("unable to create backing store for hugepages");
2382 free(filename);
2383 return NULL;
2384 }
2385 unlink(filename);
2386 free(filename);
2387
2388 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2389
2390 /*
2391 * ftruncate is not supported by hugetlbfs in older
2392 * hosts, so don't bother bailing out on errors.
2393 * If anything goes wrong with it under other filesystems,
2394 * mmap will fail.
2395 */
2396 if (ftruncate(fd, memory))
2397 perror("ftruncate");
2398
2399 #ifdef MAP_POPULATE
2400 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2401 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2402 * to sidestep this quirk.
2403 */
2404 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2405 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2406 #else
2407 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2408 #endif
2409 if (area == MAP_FAILED) {
2410 perror("file_ram_alloc: can't mmap RAM pages");
2411 close(fd);
2412 return (NULL);
2413 }
2414 block->fd = fd;
2415 return area;
2416 }
2417 #endif
2418
2419 static ram_addr_t find_ram_offset(ram_addr_t size)
2420 {
2421 RAMBlock *block, *next_block;
2422 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2423
2424 if (QLIST_EMPTY(&ram_list.blocks))
2425 return 0;
2426
2427 QLIST_FOREACH(block, &ram_list.blocks, next) {
2428 ram_addr_t end, next = RAM_ADDR_MAX;
2429
2430 end = block->offset + block->length;
2431
2432 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2433 if (next_block->offset >= end) {
2434 next = MIN(next, next_block->offset);
2435 }
2436 }
2437 if (next - end >= size && next - end < mingap) {
2438 offset = end;
2439 mingap = next - end;
2440 }
2441 }
2442
2443 if (offset == RAM_ADDR_MAX) {
2444 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2445 (uint64_t)size);
2446 abort();
2447 }
2448
2449 return offset;
2450 }
2451
2452 ram_addr_t last_ram_offset(void)
2453 {
2454 RAMBlock *block;
2455 ram_addr_t last = 0;
2456
2457 QLIST_FOREACH(block, &ram_list.blocks, next)
2458 last = MAX(last, block->offset + block->length);
2459
2460 return last;
2461 }
2462
2463 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2464 {
2465 int ret;
2466 QemuOpts *machine_opts;
2467
2468 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2469 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2470 if (machine_opts &&
2471 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2472 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2473 if (ret) {
2474 perror("qemu_madvise");
2475 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2476 "but dump_guest_core=off specified\n");
2477 }
2478 }
2479 }
2480
2481 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2482 {
2483 RAMBlock *new_block, *block;
2484
2485 new_block = NULL;
2486 QLIST_FOREACH(block, &ram_list.blocks, next) {
2487 if (block->offset == addr) {
2488 new_block = block;
2489 break;
2490 }
2491 }
2492 assert(new_block);
2493 assert(!new_block->idstr[0]);
2494
2495 if (dev) {
2496 char *id = qdev_get_dev_path(dev);
2497 if (id) {
2498 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2499 g_free(id);
2500 }
2501 }
2502 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2503
2504 QLIST_FOREACH(block, &ram_list.blocks, next) {
2505 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2506 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2507 new_block->idstr);
2508 abort();
2509 }
2510 }
2511 }
2512
2513 static int memory_try_enable_merging(void *addr, size_t len)
2514 {
2515 QemuOpts *opts;
2516
2517 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2518 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2519 /* disabled by the user */
2520 return 0;
2521 }
2522
2523 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2524 }
2525
2526 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2527 MemoryRegion *mr)
2528 {
2529 RAMBlock *new_block;
2530
2531 size = TARGET_PAGE_ALIGN(size);
2532 new_block = g_malloc0(sizeof(*new_block));
2533
2534 new_block->mr = mr;
2535 new_block->offset = find_ram_offset(size);
2536 if (host) {
2537 new_block->host = host;
2538 new_block->flags |= RAM_PREALLOC_MASK;
2539 } else {
2540 if (mem_path) {
2541 #if defined (__linux__) && !defined(TARGET_S390X)
2542 new_block->host = file_ram_alloc(new_block, size, mem_path);
2543 if (!new_block->host) {
2544 new_block->host = qemu_vmalloc(size);
2545 memory_try_enable_merging(new_block->host, size);
2546 }
2547 #else
2548 fprintf(stderr, "-mem-path option unsupported\n");
2549 exit(1);
2550 #endif
2551 } else {
2552 if (xen_enabled()) {
2553 xen_ram_alloc(new_block->offset, size, mr);
2554 } else if (kvm_enabled()) {
2555 /* some s390/kvm configurations have special constraints */
2556 new_block->host = kvm_vmalloc(size);
2557 } else {
2558 new_block->host = qemu_vmalloc(size);
2559 }
2560 memory_try_enable_merging(new_block->host, size);
2561 }
2562 }
2563 new_block->length = size;
2564
2565 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2566
2567 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2568 last_ram_offset() >> TARGET_PAGE_BITS);
2569 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2570 0, size >> TARGET_PAGE_BITS);
2571 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2572
2573 qemu_ram_setup_dump(new_block->host, size);
2574 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2575
2576 if (kvm_enabled())
2577 kvm_setup_guest_memory(new_block->host, size);
2578
2579 return new_block->offset;
2580 }
2581
2582 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2583 {
2584 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2585 }
2586
2587 void qemu_ram_free_from_ptr(ram_addr_t addr)
2588 {
2589 RAMBlock *block;
2590
2591 QLIST_FOREACH(block, &ram_list.blocks, next) {
2592 if (addr == block->offset) {
2593 QLIST_REMOVE(block, next);
2594 g_free(block);
2595 return;
2596 }
2597 }
2598 }
2599
2600 void qemu_ram_free(ram_addr_t addr)
2601 {
2602 RAMBlock *block;
2603
2604 QLIST_FOREACH(block, &ram_list.blocks, next) {
2605 if (addr == block->offset) {
2606 QLIST_REMOVE(block, next);
2607 if (block->flags & RAM_PREALLOC_MASK) {
2608 ;
2609 } else if (mem_path) {
2610 #if defined (__linux__) && !defined(TARGET_S390X)
2611 if (block->fd) {
2612 munmap(block->host, block->length);
2613 close(block->fd);
2614 } else {
2615 qemu_vfree(block->host);
2616 }
2617 #else
2618 abort();
2619 #endif
2620 } else {
2621 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2622 munmap(block->host, block->length);
2623 #else
2624 if (xen_enabled()) {
2625 xen_invalidate_map_cache_entry(block->host);
2626 } else {
2627 qemu_vfree(block->host);
2628 }
2629 #endif
2630 }
2631 g_free(block);
2632 return;
2633 }
2634 }
2635
2636 }
2637
2638 #ifndef _WIN32
2639 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2640 {
2641 RAMBlock *block;
2642 ram_addr_t offset;
2643 int flags;
2644 void *area, *vaddr;
2645
2646 QLIST_FOREACH(block, &ram_list.blocks, next) {
2647 offset = addr - block->offset;
2648 if (offset < block->length) {
2649 vaddr = block->host + offset;
2650 if (block->flags & RAM_PREALLOC_MASK) {
2651 ;
2652 } else {
2653 flags = MAP_FIXED;
2654 munmap(vaddr, length);
2655 if (mem_path) {
2656 #if defined(__linux__) && !defined(TARGET_S390X)
2657 if (block->fd) {
2658 #ifdef MAP_POPULATE
2659 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2660 MAP_PRIVATE;
2661 #else
2662 flags |= MAP_PRIVATE;
2663 #endif
2664 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2665 flags, block->fd, offset);
2666 } else {
2667 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2668 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669 flags, -1, 0);
2670 }
2671 #else
2672 abort();
2673 #endif
2674 } else {
2675 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2676 flags |= MAP_SHARED | MAP_ANONYMOUS;
2677 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2678 flags, -1, 0);
2679 #else
2680 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2681 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2682 flags, -1, 0);
2683 #endif
2684 }
2685 if (area != vaddr) {
2686 fprintf(stderr, "Could not remap addr: "
2687 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2688 length, addr);
2689 exit(1);
2690 }
2691 memory_try_enable_merging(vaddr, length);
2692 qemu_ram_setup_dump(vaddr, length);
2693 }
2694 return;
2695 }
2696 }
2697 }
2698 #endif /* !_WIN32 */
2699
2700 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2701 With the exception of the softmmu code in this file, this should
2702 only be used for local memory (e.g. video ram) that the device owns,
2703 and knows it isn't going to access beyond the end of the block.
2704
2705 It should not be used for general purpose DMA.
2706 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2707 */
2708 void *qemu_get_ram_ptr(ram_addr_t addr)
2709 {
2710 RAMBlock *block;
2711
2712 QLIST_FOREACH(block, &ram_list.blocks, next) {
2713 if (addr - block->offset < block->length) {
2714 /* Move this entry to to start of the list. */
2715 if (block != QLIST_FIRST(&ram_list.blocks)) {
2716 QLIST_REMOVE(block, next);
2717 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2718 }
2719 if (xen_enabled()) {
2720 /* We need to check if the requested address is in the RAM
2721 * because we don't want to map the entire memory in QEMU.
2722 * In that case just map until the end of the page.
2723 */
2724 if (block->offset == 0) {
2725 return xen_map_cache(addr, 0, 0);
2726 } else if (block->host == NULL) {
2727 block->host =
2728 xen_map_cache(block->offset, block->length, 1);
2729 }
2730 }
2731 return block->host + (addr - block->offset);
2732 }
2733 }
2734
2735 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2736 abort();
2737
2738 return NULL;
2739 }
2740
2741 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2742 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2743 */
2744 void *qemu_safe_ram_ptr(ram_addr_t addr)
2745 {
2746 RAMBlock *block;
2747
2748 QLIST_FOREACH(block, &ram_list.blocks, next) {
2749 if (addr - block->offset < block->length) {
2750 if (xen_enabled()) {
2751 /* We need to check if the requested address is in the RAM
2752 * because we don't want to map the entire memory in QEMU.
2753 * In that case just map until the end of the page.
2754 */
2755 if (block->offset == 0) {
2756 return xen_map_cache(addr, 0, 0);
2757 } else if (block->host == NULL) {
2758 block->host =
2759 xen_map_cache(block->offset, block->length, 1);
2760 }
2761 }
2762 return block->host + (addr - block->offset);
2763 }
2764 }
2765
2766 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2767 abort();
2768
2769 return NULL;
2770 }
2771
2772 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2773 * but takes a size argument */
2774 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2775 {
2776 if (*size == 0) {
2777 return NULL;
2778 }
2779 if (xen_enabled()) {
2780 return xen_map_cache(addr, *size, 1);
2781 } else {
2782 RAMBlock *block;
2783
2784 QLIST_FOREACH(block, &ram_list.blocks, next) {
2785 if (addr - block->offset < block->length) {
2786 if (addr - block->offset + *size > block->length)
2787 *size = block->length - addr + block->offset;
2788 return block->host + (addr - block->offset);
2789 }
2790 }
2791
2792 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2793 abort();
2794 }
2795 }
2796
2797 void qemu_put_ram_ptr(void *addr)
2798 {
2799 trace_qemu_put_ram_ptr(addr);
2800 }
2801
2802 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2803 {
2804 RAMBlock *block;
2805 uint8_t *host = ptr;
2806
2807 if (xen_enabled()) {
2808 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2809 return 0;
2810 }
2811
2812 QLIST_FOREACH(block, &ram_list.blocks, next) {
2813 /* This case append when the block is not mapped. */
2814 if (block->host == NULL) {
2815 continue;
2816 }
2817 if (host - block->host < block->length) {
2818 *ram_addr = block->offset + (host - block->host);
2819 return 0;
2820 }
2821 }
2822
2823 return -1;
2824 }
2825
2826 /* Some of the softmmu routines need to translate from a host pointer
2827 (typically a TLB entry) back to a ram offset. */
2828 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2829 {
2830 ram_addr_t ram_addr;
2831
2832 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2833 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2834 abort();
2835 }
2836 return ram_addr;
2837 }
2838
2839 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2840 unsigned size)
2841 {
2842 #ifdef DEBUG_UNASSIGNED
2843 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2844 #endif
2845 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2846 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2847 #endif
2848 return 0;
2849 }
2850
2851 static void unassigned_mem_write(void *opaque, hwaddr addr,
2852 uint64_t val, unsigned size)
2853 {
2854 #ifdef DEBUG_UNASSIGNED
2855 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2856 #endif
2857 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2858 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2859 #endif
2860 }
2861
2862 static const MemoryRegionOps unassigned_mem_ops = {
2863 .read = unassigned_mem_read,
2864 .write = unassigned_mem_write,
2865 .endianness = DEVICE_NATIVE_ENDIAN,
2866 };
2867
2868 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2869 unsigned size)
2870 {
2871 abort();
2872 }
2873
2874 static void error_mem_write(void *opaque, hwaddr addr,
2875 uint64_t value, unsigned size)
2876 {
2877 abort();
2878 }
2879
2880 static const MemoryRegionOps error_mem_ops = {
2881 .read = error_mem_read,
2882 .write = error_mem_write,
2883 .endianness = DEVICE_NATIVE_ENDIAN,
2884 };
2885
2886 static const MemoryRegionOps rom_mem_ops = {
2887 .read = error_mem_read,
2888 .write = unassigned_mem_write,
2889 .endianness = DEVICE_NATIVE_ENDIAN,
2890 };
2891
2892 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2893 uint64_t val, unsigned size)
2894 {
2895 int dirty_flags;
2896 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2897 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2898 #if !defined(CONFIG_USER_ONLY)
2899 tb_invalidate_phys_page_fast(ram_addr, size);
2900 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901 #endif
2902 }
2903 switch (size) {
2904 case 1:
2905 stb_p(qemu_get_ram_ptr(ram_addr), val);
2906 break;
2907 case 2:
2908 stw_p(qemu_get_ram_ptr(ram_addr), val);
2909 break;
2910 case 4:
2911 stl_p(qemu_get_ram_ptr(ram_addr), val);
2912 break;
2913 default:
2914 abort();
2915 }
2916 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2917 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2918 /* we remove the notdirty callback only if the code has been
2919 flushed */
2920 if (dirty_flags == 0xff)
2921 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2922 }
2923
2924 static const MemoryRegionOps notdirty_mem_ops = {
2925 .read = error_mem_read,
2926 .write = notdirty_mem_write,
2927 .endianness = DEVICE_NATIVE_ENDIAN,
2928 };
2929
2930 /* Generate a debug exception if a watchpoint has been hit. */
2931 static void check_watchpoint(int offset, int len_mask, int flags)
2932 {
2933 CPUArchState *env = cpu_single_env;
2934 target_ulong pc, cs_base;
2935 TranslationBlock *tb;
2936 target_ulong vaddr;
2937 CPUWatchpoint *wp;
2938 int cpu_flags;
2939
2940 if (env->watchpoint_hit) {
2941 /* We re-entered the check after replacing the TB. Now raise
2942 * the debug interrupt so that is will trigger after the
2943 * current instruction. */
2944 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2945 return;
2946 }
2947 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2948 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2949 if ((vaddr == (wp->vaddr & len_mask) ||
2950 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2951 wp->flags |= BP_WATCHPOINT_HIT;
2952 if (!env->watchpoint_hit) {
2953 env->watchpoint_hit = wp;
2954 tb = tb_find_pc(env->mem_io_pc);
2955 if (!tb) {
2956 cpu_abort(env, "check_watchpoint: could not find TB for "
2957 "pc=%p", (void *)env->mem_io_pc);
2958 }
2959 cpu_restore_state(tb, env, env->mem_io_pc);
2960 tb_phys_invalidate(tb, -1);
2961 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2962 env->exception_index = EXCP_DEBUG;
2963 cpu_loop_exit(env);
2964 } else {
2965 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2966 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2967 cpu_resume_from_signal(env, NULL);
2968 }
2969 }
2970 } else {
2971 wp->flags &= ~BP_WATCHPOINT_HIT;
2972 }
2973 }
2974 }
2975
2976 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2977 so these check for a hit then pass through to the normal out-of-line
2978 phys routines. */
2979 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2980 unsigned size)
2981 {
2982 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2983 switch (size) {
2984 case 1: return ldub_phys(addr);
2985 case 2: return lduw_phys(addr);
2986 case 4: return ldl_phys(addr);
2987 default: abort();
2988 }
2989 }
2990
2991 static void watch_mem_write(void *opaque, hwaddr addr,
2992 uint64_t val, unsigned size)
2993 {
2994 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2995 switch (size) {
2996 case 1:
2997 stb_phys(addr, val);
2998 break;
2999 case 2:
3000 stw_phys(addr, val);
3001 break;
3002 case 4:
3003 stl_phys(addr, val);
3004 break;
3005 default: abort();
3006 }
3007 }
3008
3009 static const MemoryRegionOps watch_mem_ops = {
3010 .read = watch_mem_read,
3011 .write = watch_mem_write,
3012 .endianness = DEVICE_NATIVE_ENDIAN,
3013 };
3014
3015 static uint64_t subpage_read(void *opaque, hwaddr addr,
3016 unsigned len)
3017 {
3018 subpage_t *mmio = opaque;
3019 unsigned int idx = SUBPAGE_IDX(addr);
3020 MemoryRegionSection *section;
3021 #if defined(DEBUG_SUBPAGE)
3022 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3023 mmio, len, addr, idx);
3024 #endif
3025
3026 section = &phys_sections[mmio->sub_section[idx]];
3027 addr += mmio->base;
3028 addr -= section->offset_within_address_space;
3029 addr += section->offset_within_region;
3030 return io_mem_read(section->mr, addr, len);
3031 }
3032
3033 static void subpage_write(void *opaque, hwaddr addr,
3034 uint64_t value, unsigned len)
3035 {
3036 subpage_t *mmio = opaque;
3037 unsigned int idx = SUBPAGE_IDX(addr);
3038 MemoryRegionSection *section;
3039 #if defined(DEBUG_SUBPAGE)
3040 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3041 " idx %d value %"PRIx64"\n",
3042 __func__, mmio, len, addr, idx, value);
3043 #endif
3044
3045 section = &phys_sections[mmio->sub_section[idx]];
3046 addr += mmio->base;
3047 addr -= section->offset_within_address_space;
3048 addr += section->offset_within_region;
3049 io_mem_write(section->mr, addr, value, len);
3050 }
3051
3052 static const MemoryRegionOps subpage_ops = {
3053 .read = subpage_read,
3054 .write = subpage_write,
3055 .endianness = DEVICE_NATIVE_ENDIAN,
3056 };
3057
3058 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3059 unsigned size)
3060 {
3061 ram_addr_t raddr = addr;
3062 void *ptr = qemu_get_ram_ptr(raddr);
3063 switch (size) {
3064 case 1: return ldub_p(ptr);
3065 case 2: return lduw_p(ptr);
3066 case 4: return ldl_p(ptr);
3067 default: abort();
3068 }
3069 }
3070
3071 static void subpage_ram_write(void *opaque, hwaddr addr,
3072 uint64_t value, unsigned size)
3073 {
3074 ram_addr_t raddr = addr;
3075 void *ptr = qemu_get_ram_ptr(raddr);
3076 switch (size) {
3077 case 1: return stb_p(ptr, value);
3078 case 2: return stw_p(ptr, value);
3079 case 4: return stl_p(ptr, value);
3080 default: abort();
3081 }
3082 }
3083
3084 static const MemoryRegionOps subpage_ram_ops = {
3085 .read = subpage_ram_read,
3086 .write = subpage_ram_write,
3087 .endianness = DEVICE_NATIVE_ENDIAN,
3088 };
3089
3090 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3091 uint16_t section)
3092 {
3093 int idx, eidx;
3094
3095 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3096 return -1;
3097 idx = SUBPAGE_IDX(start);
3098 eidx = SUBPAGE_IDX(end);
3099 #if defined(DEBUG_SUBPAGE)
3100 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3101 mmio, start, end, idx, eidx, memory);
3102 #endif
3103 if (memory_region_is_ram(phys_sections[section].mr)) {
3104 MemoryRegionSection new_section = phys_sections[section];
3105 new_section.mr = &io_mem_subpage_ram;
3106 section = phys_section_add(&new_section);
3107 }
3108 for (; idx <= eidx; idx++) {
3109 mmio->sub_section[idx] = section;
3110 }
3111
3112 return 0;
3113 }
3114
3115 static subpage_t *subpage_init(hwaddr base)
3116 {
3117 subpage_t *mmio;
3118
3119 mmio = g_malloc0(sizeof(subpage_t));
3120
3121 mmio->base = base;
3122 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3123 "subpage", TARGET_PAGE_SIZE);
3124 mmio->iomem.subpage = true;
3125 #if defined(DEBUG_SUBPAGE)
3126 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3127 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3128 #endif
3129 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3130
3131 return mmio;
3132 }
3133
3134 static uint16_t dummy_section(MemoryRegion *mr)
3135 {
3136 MemoryRegionSection section = {
3137 .mr = mr,
3138 .offset_within_address_space = 0,
3139 .offset_within_region = 0,
3140 .size = UINT64_MAX,
3141 };
3142
3143 return phys_section_add(&section);
3144 }
3145
3146 MemoryRegion *iotlb_to_region(hwaddr index)
3147 {
3148 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3149 }
3150
3151 static void io_mem_init(void)
3152 {
3153 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3154 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3155 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3156 "unassigned", UINT64_MAX);
3157 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3158 "notdirty", UINT64_MAX);
3159 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3160 "subpage-ram", UINT64_MAX);
3161 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3162 "watch", UINT64_MAX);
3163 }
3164
3165 static void mem_begin(MemoryListener *listener)
3166 {
3167 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3168
3169 destroy_all_mappings(d);
3170 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3171 }
3172
3173 static void core_begin(MemoryListener *listener)
3174 {
3175 phys_sections_clear();
3176 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3177 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3178 phys_section_rom = dummy_section(&io_mem_rom);
3179 phys_section_watch = dummy_section(&io_mem_watch);
3180 }
3181
3182 static void tcg_commit(MemoryListener *listener)
3183 {
3184 CPUArchState *env;
3185
3186 /* since each CPU stores ram addresses in its TLB cache, we must
3187 reset the modified entries */
3188 /* XXX: slow ! */
3189 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3190 tlb_flush(env, 1);
3191 }
3192 }
3193
3194 static void core_log_global_start(MemoryListener *listener)
3195 {
3196 cpu_physical_memory_set_dirty_tracking(1);
3197 }
3198
3199 static void core_log_global_stop(MemoryListener *listener)
3200 {
3201 cpu_physical_memory_set_dirty_tracking(0);
3202 }
3203
3204 static void io_region_add(MemoryListener *listener,
3205 MemoryRegionSection *section)
3206 {
3207 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3208
3209 mrio->mr = section->mr;
3210 mrio->offset = section->offset_within_region;
3211 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3212 section->offset_within_address_space, section->size);
3213 ioport_register(&mrio->iorange);
3214 }
3215
3216 static void io_region_del(MemoryListener *listener,
3217 MemoryRegionSection *section)
3218 {
3219 isa_unassign_ioport(section->offset_within_address_space, section->size);
3220 }
3221
3222 static MemoryListener core_memory_listener = {
3223 .begin = core_begin,
3224 .log_global_start = core_log_global_start,
3225 .log_global_stop = core_log_global_stop,
3226 .priority = 1,
3227 };
3228
3229 static MemoryListener io_memory_listener = {
3230 .region_add = io_region_add,
3231 .region_del = io_region_del,
3232 .priority = 0,
3233 };
3234
3235 static MemoryListener tcg_memory_listener = {
3236 .commit = tcg_commit,
3237 };
3238
3239 void address_space_init_dispatch(AddressSpace *as)
3240 {
3241 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3242
3243 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3244 d->listener = (MemoryListener) {
3245 .begin = mem_begin,
3246 .region_add = mem_add,
3247 .region_nop = mem_add,
3248 .priority = 0,
3249 };
3250 as->dispatch = d;
3251 memory_listener_register(&d->listener, as);
3252 }
3253
3254 void address_space_destroy_dispatch(AddressSpace *as)
3255 {
3256 AddressSpaceDispatch *d = as->dispatch;
3257
3258 memory_listener_unregister(&d->listener);
3259 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3260 g_free(d);
3261 as->dispatch = NULL;
3262 }
3263
3264 static void memory_map_init(void)
3265 {
3266 system_memory = g_malloc(sizeof(*system_memory));
3267 memory_region_init(system_memory, "system", INT64_MAX);
3268 address_space_init(&address_space_memory, system_memory);
3269 address_space_memory.name = "memory";
3270
3271 system_io = g_malloc(sizeof(*system_io));
3272 memory_region_init(system_io, "io", 65536);
3273 address_space_init(&address_space_io, system_io);
3274 address_space_io.name = "I/O";
3275
3276 memory_listener_register(&core_memory_listener, &address_space_memory);
3277 memory_listener_register(&io_memory_listener, &address_space_io);
3278 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3279 }
3280
3281 MemoryRegion *get_system_memory(void)
3282 {
3283 return system_memory;
3284 }
3285
3286 MemoryRegion *get_system_io(void)
3287 {
3288 return system_io;
3289 }
3290
3291 #endif /* !defined(CONFIG_USER_ONLY) */
3292
3293 /* physical memory access (slow version, mainly for debug) */
3294 #if defined(CONFIG_USER_ONLY)
3295 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3296 uint8_t *buf, int len, int is_write)
3297 {
3298 int l, flags;
3299 target_ulong page;
3300 void * p;
3301
3302 while (len > 0) {
3303 page = addr & TARGET_PAGE_MASK;
3304 l = (page + TARGET_PAGE_SIZE) - addr;
3305 if (l > len)
3306 l = len;
3307 flags = page_get_flags(page);
3308 if (!(flags & PAGE_VALID))
3309 return -1;
3310 if (is_write) {
3311 if (!(flags & PAGE_WRITE))
3312 return -1;
3313 /* XXX: this code should not depend on lock_user */
3314 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3315 return -1;
3316 memcpy(p, buf, l);
3317 unlock_user(p, addr, l);
3318 } else {
3319 if (!(flags & PAGE_READ))
3320 return -1;
3321 /* XXX: this code should not depend on lock_user */
3322 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3323 return -1;
3324 memcpy(buf, p, l);
3325 unlock_user(p, addr, 0);
3326 }
3327 len -= l;
3328 buf += l;
3329 addr += l;
3330 }
3331 return 0;
3332 }
3333
3334 #else
3335
3336 static void invalidate_and_set_dirty(hwaddr addr,
3337 hwaddr length)
3338 {
3339 if (!cpu_physical_memory_is_dirty(addr)) {
3340 /* invalidate code */
3341 tb_invalidate_phys_page_range(addr, addr + length, 0);
3342 /* set dirty bit */
3343 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3344 }
3345 xen_modified_memory(addr, length);
3346 }
3347
3348 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3349 int len, bool is_write)
3350 {
3351 AddressSpaceDispatch *d = as->dispatch;
3352 int l;
3353 uint8_t *ptr;
3354 uint32_t val;
3355 hwaddr page;
3356 MemoryRegionSection *section;
3357
3358 while (len > 0) {
3359 page = addr & TARGET_PAGE_MASK;
3360 l = (page + TARGET_PAGE_SIZE) - addr;
3361 if (l > len)
3362 l = len;
3363 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3364
3365 if (is_write) {
3366 if (!memory_region_is_ram(section->mr)) {
3367 hwaddr addr1;
3368 addr1 = memory_region_section_addr(section, addr);
3369 /* XXX: could force cpu_single_env to NULL to avoid
3370 potential bugs */
3371 if (l >= 4 && ((addr1 & 3) == 0)) {
3372 /* 32 bit write access */
3373 val = ldl_p(buf);
3374 io_mem_write(section->mr, addr1, val, 4);
3375 l = 4;
3376 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3377 /* 16 bit write access */
3378 val = lduw_p(buf);
3379 io_mem_write(section->mr, addr1, val, 2);
3380 l = 2;
3381 } else {
3382 /* 8 bit write access */
3383 val = ldub_p(buf);
3384 io_mem_write(section->mr, addr1, val, 1);
3385 l = 1;
3386 }
3387 } else if (!section->readonly) {
3388 ram_addr_t addr1;
3389 addr1 = memory_region_get_ram_addr(section->mr)
3390 + memory_region_section_addr(section, addr);
3391 /* RAM case */
3392 ptr = qemu_get_ram_ptr(addr1);
3393 memcpy(ptr, buf, l);
3394 invalidate_and_set_dirty(addr1, l);
3395 qemu_put_ram_ptr(ptr);
3396 }
3397 } else {
3398 if (!(memory_region_is_ram(section->mr) ||
3399 memory_region_is_romd(section->mr))) {
3400 hwaddr addr1;
3401 /* I/O case */
3402 addr1 = memory_region_section_addr(section, addr);
3403 if (l >= 4 && ((addr1 & 3) == 0)) {
3404 /* 32 bit read access */
3405 val = io_mem_read(section->mr, addr1, 4);
3406 stl_p(buf, val);
3407 l = 4;
3408 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3409 /* 16 bit read access */
3410 val = io_mem_read(section->mr, addr1, 2);
3411 stw_p(buf, val);
3412 l = 2;
3413 } else {
3414 /* 8 bit read access */
3415 val = io_mem_read(section->mr, addr1, 1);
3416 stb_p(buf, val);
3417 l = 1;
3418 }
3419 } else {
3420 /* RAM case */
3421 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3422 + memory_region_section_addr(section,
3423 addr));
3424 memcpy(buf, ptr, l);
3425 qemu_put_ram_ptr(ptr);
3426 }
3427 }
3428 len -= l;
3429 buf += l;
3430 addr += l;
3431 }
3432 }
3433
3434 void address_space_write(AddressSpace *as, hwaddr addr,
3435 const uint8_t *buf, int len)
3436 {
3437 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3438 }
3439
3440 /**
3441 * address_space_read: read from an address space.
3442 *
3443 * @as: #AddressSpace to be accessed
3444 * @addr: address within that address space
3445 * @buf: buffer with the data transferred
3446 */
3447 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3448 {
3449 address_space_rw(as, addr, buf, len, false);
3450 }
3451
3452
3453 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3454 int len, int is_write)
3455 {
3456 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3457 }
3458
3459 /* used for ROM loading : can write in RAM and ROM */
3460 void cpu_physical_memory_write_rom(hwaddr addr,
3461 const uint8_t *buf, int len)
3462 {
3463 AddressSpaceDispatch *d = address_space_memory.dispatch;
3464 int l;
3465 uint8_t *ptr;
3466 hwaddr page;
3467 MemoryRegionSection *section;
3468
3469 while (len > 0) {
3470 page = addr & TARGET_PAGE_MASK;
3471 l = (page + TARGET_PAGE_SIZE) - addr;
3472 if (l > len)
3473 l = len;
3474 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3475
3476 if (!(memory_region_is_ram(section->mr) ||
3477 memory_region_is_romd(section->mr))) {
3478 /* do nothing */
3479 } else {
3480 unsigned long addr1;
3481 addr1 = memory_region_get_ram_addr(section->mr)
3482 + memory_region_section_addr(section, addr);
3483 /* ROM/RAM case */
3484 ptr = qemu_get_ram_ptr(addr1);
3485 memcpy(ptr, buf, l);
3486 invalidate_and_set_dirty(addr1, l);
3487 qemu_put_ram_ptr(ptr);
3488 }
3489 len -= l;
3490 buf += l;
3491 addr += l;
3492 }
3493 }
3494
3495 typedef struct {
3496 void *buffer;
3497 hwaddr addr;
3498 hwaddr len;
3499 } BounceBuffer;
3500
3501 static BounceBuffer bounce;
3502
3503 typedef struct MapClient {
3504 void *opaque;
3505 void (*callback)(void *opaque);
3506 QLIST_ENTRY(MapClient) link;
3507 } MapClient;
3508
3509 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3510 = QLIST_HEAD_INITIALIZER(map_client_list);
3511
3512 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3513 {
3514 MapClient *client = g_malloc(sizeof(*client));
3515
3516 client->opaque = opaque;
3517 client->callback = callback;
3518 QLIST_INSERT_HEAD(&map_client_list, client, link);
3519 return client;
3520 }
3521
3522 void cpu_unregister_map_client(void *_client)
3523 {
3524 MapClient *client = (MapClient *)_client;
3525
3526 QLIST_REMOVE(client, link);
3527 g_free(client);
3528 }
3529
3530 static void cpu_notify_map_clients(void)
3531 {
3532 MapClient *client;
3533
3534 while (!QLIST_EMPTY(&map_client_list)) {
3535 client = QLIST_FIRST(&map_client_list);
3536 client->callback(client->opaque);
3537 cpu_unregister_map_client(client);
3538 }
3539 }
3540
3541 /* Map a physical memory region into a host virtual address.
3542 * May map a subset of the requested range, given by and returned in *plen.
3543 * May return NULL if resources needed to perform the mapping are exhausted.
3544 * Use only for reads OR writes - not for read-modify-write operations.
3545 * Use cpu_register_map_client() to know when retrying the map operation is
3546 * likely to succeed.
3547 */
3548 void *address_space_map(AddressSpace *as,
3549 hwaddr addr,
3550 hwaddr *plen,
3551 bool is_write)
3552 {
3553 AddressSpaceDispatch *d = as->dispatch;
3554 hwaddr len = *plen;
3555 hwaddr todo = 0;
3556 int l;
3557 hwaddr page;
3558 MemoryRegionSection *section;
3559 ram_addr_t raddr = RAM_ADDR_MAX;
3560 ram_addr_t rlen;
3561 void *ret;
3562
3563 while (len > 0) {
3564 page = addr & TARGET_PAGE_MASK;
3565 l = (page + TARGET_PAGE_SIZE) - addr;
3566 if (l > len)
3567 l = len;
3568 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3569
3570 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3571 if (todo || bounce.buffer) {
3572 break;
3573 }
3574 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3575 bounce.addr = addr;
3576 bounce.len = l;
3577 if (!is_write) {
3578 address_space_read(as, addr, bounce.buffer, l);
3579 }
3580
3581 *plen = l;
3582 return bounce.buffer;
3583 }
3584 if (!todo) {
3585 raddr = memory_region_get_ram_addr(section->mr)
3586 + memory_region_section_addr(section, addr);
3587 }
3588
3589 len -= l;
3590 addr += l;
3591 todo += l;
3592 }
3593 rlen = todo;
3594 ret = qemu_ram_ptr_length(raddr, &rlen);
3595 *plen = rlen;
3596 return ret;
3597 }
3598
3599 /* Unmaps a memory region previously mapped by address_space_map().
3600 * Will also mark the memory as dirty if is_write == 1. access_len gives
3601 * the amount of memory that was actually read or written by the caller.
3602 */
3603 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3604 int is_write, hwaddr access_len)
3605 {
3606 if (buffer != bounce.buffer) {
3607 if (is_write) {
3608 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3609 while (access_len) {
3610 unsigned l;
3611 l = TARGET_PAGE_SIZE;
3612 if (l > access_len)
3613 l = access_len;
3614 invalidate_and_set_dirty(addr1, l);
3615 addr1 += l;
3616 access_len -= l;
3617 }
3618 }
3619 if (xen_enabled()) {
3620 xen_invalidate_map_cache_entry(buffer);
3621 }
3622 return;
3623 }
3624 if (is_write) {
3625 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3626 }
3627 qemu_vfree(bounce.buffer);
3628 bounce.buffer = NULL;
3629 cpu_notify_map_clients();
3630 }
3631
3632 void *cpu_physical_memory_map(hwaddr addr,
3633 hwaddr *plen,
3634 int is_write)
3635 {
3636 return address_space_map(&address_space_memory, addr, plen, is_write);
3637 }
3638
3639 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3640 int is_write, hwaddr access_len)
3641 {
3642 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3643 }
3644
3645 /* warning: addr must be aligned */
3646 static inline uint32_t ldl_phys_internal(hwaddr addr,
3647 enum device_endian endian)
3648 {
3649 uint8_t *ptr;
3650 uint32_t val;
3651 MemoryRegionSection *section;
3652
3653 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3654
3655 if (!(memory_region_is_ram(section->mr) ||
3656 memory_region_is_romd(section->mr))) {
3657 /* I/O case */
3658 addr = memory_region_section_addr(section, addr);
3659 val = io_mem_read(section->mr, addr, 4);
3660 #if defined(TARGET_WORDS_BIGENDIAN)
3661 if (endian == DEVICE_LITTLE_ENDIAN) {
3662 val = bswap32(val);
3663 }
3664 #else
3665 if (endian == DEVICE_BIG_ENDIAN) {
3666 val = bswap32(val);
3667 }
3668 #endif
3669 } else {
3670 /* RAM case */
3671 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3672 & TARGET_PAGE_MASK)
3673 + memory_region_section_addr(section, addr));
3674 switch (endian) {
3675 case DEVICE_LITTLE_ENDIAN:
3676 val = ldl_le_p(ptr);
3677 break;
3678 case DEVICE_BIG_ENDIAN:
3679 val = ldl_be_p(ptr);
3680 break;
3681 default:
3682 val = ldl_p(ptr);
3683 break;
3684 }
3685 }
3686 return val;
3687 }
3688
3689 uint32_t ldl_phys(hwaddr addr)
3690 {
3691 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3692 }
3693
3694 uint32_t ldl_le_phys(hwaddr addr)
3695 {
3696 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3697 }
3698
3699 uint32_t ldl_be_phys(hwaddr addr)
3700 {
3701 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3702 }
3703
3704 /* warning: addr must be aligned */
3705 static inline uint64_t ldq_phys_internal(hwaddr addr,
3706 enum device_endian endian)
3707 {
3708 uint8_t *ptr;
3709 uint64_t val;
3710 MemoryRegionSection *section;
3711
3712 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3713
3714 if (!(memory_region_is_ram(section->mr) ||
3715 memory_region_is_romd(section->mr))) {
3716 /* I/O case */
3717 addr = memory_region_section_addr(section, addr);
3718
3719 /* XXX This is broken when device endian != cpu endian.
3720 Fix and add "endian" variable check */
3721 #ifdef TARGET_WORDS_BIGENDIAN
3722 val = io_mem_read(section->mr, addr, 4) << 32;
3723 val |= io_mem_read(section->mr, addr + 4, 4);
3724 #else
3725 val = io_mem_read(section->mr, addr, 4);
3726 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3727 #endif
3728 } else {
3729 /* RAM case */
3730 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3731 & TARGET_PAGE_MASK)
3732 + memory_region_section_addr(section, addr));
3733 switch (endian) {
3734 case DEVICE_LITTLE_ENDIAN:
3735 val = ldq_le_p(ptr);
3736 break;
3737 case DEVICE_BIG_ENDIAN:
3738 val = ldq_be_p(ptr);
3739 break;
3740 default:
3741 val = ldq_p(ptr);
3742 break;
3743 }
3744 }
3745 return val;
3746 }
3747
3748 uint64_t ldq_phys(hwaddr addr)
3749 {
3750 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3751 }
3752
3753 uint64_t ldq_le_phys(hwaddr addr)
3754 {
3755 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3756 }
3757
3758 uint64_t ldq_be_phys(hwaddr addr)
3759 {
3760 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3761 }
3762
3763 /* XXX: optimize */
3764 uint32_t ldub_phys(hwaddr addr)
3765 {
3766 uint8_t val;
3767 cpu_physical_memory_read(addr, &val, 1);
3768 return val;
3769 }
3770
3771 /* warning: addr must be aligned */
3772 static inline uint32_t lduw_phys_internal(hwaddr addr,
3773 enum device_endian endian)
3774 {
3775 uint8_t *ptr;
3776 uint64_t val;
3777 MemoryRegionSection *section;
3778
3779 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3780
3781 if (!(memory_region_is_ram(section->mr) ||
3782 memory_region_is_romd(section->mr))) {
3783 /* I/O case */
3784 addr = memory_region_section_addr(section, addr);
3785 val = io_mem_read(section->mr, addr, 2);
3786 #if defined(TARGET_WORDS_BIGENDIAN)
3787 if (endian == DEVICE_LITTLE_ENDIAN) {
3788 val = bswap16(val);
3789 }
3790 #else
3791 if (endian == DEVICE_BIG_ENDIAN) {
3792 val = bswap16(val);
3793 }
3794 #endif
3795 } else {
3796 /* RAM case */
3797 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3798 & TARGET_PAGE_MASK)
3799 + memory_region_section_addr(section, addr));
3800 switch (endian) {
3801 case DEVICE_LITTLE_ENDIAN:
3802 val = lduw_le_p(ptr);
3803 break;
3804 case DEVICE_BIG_ENDIAN:
3805 val = lduw_be_p(ptr);
3806 break;
3807 default:
3808 val = lduw_p(ptr);
3809 break;
3810 }
3811 }
3812 return val;
3813 }
3814
3815 uint32_t lduw_phys(hwaddr addr)
3816 {
3817 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3818 }
3819
3820 uint32_t lduw_le_phys(hwaddr addr)
3821 {
3822 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3823 }
3824
3825 uint32_t lduw_be_phys(hwaddr addr)
3826 {
3827 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3828 }
3829
3830 /* warning: addr must be aligned. The ram page is not masked as dirty
3831 and the code inside is not invalidated. It is useful if the dirty
3832 bits are used to track modified PTEs */
3833 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3834 {
3835 uint8_t *ptr;
3836 MemoryRegionSection *section;
3837
3838 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3839
3840 if (!memory_region_is_ram(section->mr) || section->readonly) {
3841 addr = memory_region_section_addr(section, addr);
3842 if (memory_region_is_ram(section->mr)) {
3843 section = &phys_sections[phys_section_rom];
3844 }
3845 io_mem_write(section->mr, addr, val, 4);
3846 } else {
3847 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3848 & TARGET_PAGE_MASK)
3849 + memory_region_section_addr(section, addr);
3850 ptr = qemu_get_ram_ptr(addr1);
3851 stl_p(ptr, val);
3852
3853 if (unlikely(in_migration)) {
3854 if (!cpu_physical_memory_is_dirty(addr1)) {
3855 /* invalidate code */
3856 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3857 /* set dirty bit */
3858 cpu_physical_memory_set_dirty_flags(
3859 addr1, (0xff & ~CODE_DIRTY_FLAG));
3860 }
3861 }
3862 }
3863 }
3864
3865 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3866 {
3867 uint8_t *ptr;
3868 MemoryRegionSection *section;
3869
3870 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3871
3872 if (!memory_region_is_ram(section->mr) || section->readonly) {
3873 addr = memory_region_section_addr(section, addr);
3874 if (memory_region_is_ram(section->mr)) {
3875 section = &phys_sections[phys_section_rom];
3876 }
3877 #ifdef TARGET_WORDS_BIGENDIAN
3878 io_mem_write(section->mr, addr, val >> 32, 4);
3879 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3880 #else
3881 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3882 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3883 #endif
3884 } else {
3885 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3886 & TARGET_PAGE_MASK)
3887 + memory_region_section_addr(section, addr));
3888 stq_p(ptr, val);
3889 }
3890 }
3891
3892 /* warning: addr must be aligned */
3893 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3894 enum device_endian endian)
3895 {
3896 uint8_t *ptr;
3897 MemoryRegionSection *section;
3898
3899 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3900
3901 if (!memory_region_is_ram(section->mr) || section->readonly) {
3902 addr = memory_region_section_addr(section, addr);
3903 if (memory_region_is_ram(section->mr)) {
3904 section = &phys_sections[phys_section_rom];
3905 }
3906 #if defined(TARGET_WORDS_BIGENDIAN)
3907 if (endian == DEVICE_LITTLE_ENDIAN) {
3908 val = bswap32(val);
3909 }
3910 #else
3911 if (endian == DEVICE_BIG_ENDIAN) {
3912 val = bswap32(val);
3913 }
3914 #endif
3915 io_mem_write(section->mr, addr, val, 4);
3916 } else {
3917 unsigned long addr1;
3918 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3919 + memory_region_section_addr(section, addr);
3920 /* RAM case */
3921 ptr = qemu_get_ram_ptr(addr1);
3922 switch (endian) {
3923 case DEVICE_LITTLE_ENDIAN:
3924 stl_le_p(ptr, val);
3925 break;
3926 case DEVICE_BIG_ENDIAN:
3927 stl_be_p(ptr, val);
3928 break;
3929 default:
3930 stl_p(ptr, val);
3931 break;
3932 }
3933 invalidate_and_set_dirty(addr1, 4);
3934 }
3935 }
3936
3937 void stl_phys(hwaddr addr, uint32_t val)
3938 {
3939 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3940 }
3941
3942 void stl_le_phys(hwaddr addr, uint32_t val)
3943 {
3944 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3945 }
3946
3947 void stl_be_phys(hwaddr addr, uint32_t val)
3948 {
3949 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3950 }
3951
3952 /* XXX: optimize */
3953 void stb_phys(hwaddr addr, uint32_t val)
3954 {
3955 uint8_t v = val;
3956 cpu_physical_memory_write(addr, &v, 1);
3957 }
3958
3959 /* warning: addr must be aligned */
3960 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3961 enum device_endian endian)
3962 {
3963 uint8_t *ptr;
3964 MemoryRegionSection *section;
3965
3966 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3967
3968 if (!memory_region_is_ram(section->mr) || section->readonly) {
3969 addr = memory_region_section_addr(section, addr);
3970 if (memory_region_is_ram(section->mr)) {
3971 section = &phys_sections[phys_section_rom];
3972 }
3973 #if defined(TARGET_WORDS_BIGENDIAN)
3974 if (endian == DEVICE_LITTLE_ENDIAN) {
3975 val = bswap16(val);
3976 }
3977 #else
3978 if (endian == DEVICE_BIG_ENDIAN) {
3979 val = bswap16(val);
3980 }
3981 #endif
3982 io_mem_write(section->mr, addr, val, 2);
3983 } else {
3984 unsigned long addr1;
3985 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3986 + memory_region_section_addr(section, addr);
3987 /* RAM case */
3988 ptr = qemu_get_ram_ptr(addr1);
3989 switch (endian) {
3990 case DEVICE_LITTLE_ENDIAN:
3991 stw_le_p(ptr, val);
3992 break;
3993 case DEVICE_BIG_ENDIAN:
3994 stw_be_p(ptr, val);
3995 break;
3996 default:
3997 stw_p(ptr, val);
3998 break;
3999 }
4000 invalidate_and_set_dirty(addr1, 2);
4001 }
4002 }
4003
4004 void stw_phys(hwaddr addr, uint32_t val)
4005 {
4006 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4007 }
4008
4009 void stw_le_phys(hwaddr addr, uint32_t val)
4010 {
4011 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4012 }
4013
4014 void stw_be_phys(hwaddr addr, uint32_t val)
4015 {
4016 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4017 }
4018
4019 /* XXX: optimize */
4020 void stq_phys(hwaddr addr, uint64_t val)
4021 {
4022 val = tswap64(val);
4023 cpu_physical_memory_write(addr, &val, 8);
4024 }
4025
4026 void stq_le_phys(hwaddr addr, uint64_t val)
4027 {
4028 val = cpu_to_le64(val);
4029 cpu_physical_memory_write(addr, &val, 8);
4030 }
4031
4032 void stq_be_phys(hwaddr addr, uint64_t val)
4033 {
4034 val = cpu_to_be64(val);
4035 cpu_physical_memory_write(addr, &val, 8);
4036 }
4037
4038 /* virtual memory access for debug (includes writing to ROM) */
4039 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4040 uint8_t *buf, int len, int is_write)
4041 {
4042 int l;
4043 hwaddr phys_addr;
4044 target_ulong page;
4045
4046 while (len > 0) {
4047 page = addr & TARGET_PAGE_MASK;
4048 phys_addr = cpu_get_phys_page_debug(env, page);
4049 /* if no physical page mapped, return an error */
4050 if (phys_addr == -1)
4051 return -1;
4052 l = (page + TARGET_PAGE_SIZE) - addr;
4053 if (l > len)
4054 l = len;
4055 phys_addr += (addr & ~TARGET_PAGE_MASK);
4056 if (is_write)
4057 cpu_physical_memory_write_rom(phys_addr, buf, l);
4058 else
4059 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4060 len -= l;
4061 buf += l;
4062 addr += l;
4063 }
4064 return 0;
4065 }
4066 #endif
4067
4068 /* in deterministic execution mode, instructions doing device I/Os
4069 must be at the end of the TB */
4070 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4071 {
4072 TranslationBlock *tb;
4073 uint32_t n, cflags;
4074 target_ulong pc, cs_base;
4075 uint64_t flags;
4076
4077 tb = tb_find_pc(retaddr);
4078 if (!tb) {
4079 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4080 (void *)retaddr);
4081 }
4082 n = env->icount_decr.u16.low + tb->icount;
4083 cpu_restore_state(tb, env, retaddr);
4084 /* Calculate how many instructions had been executed before the fault
4085 occurred. */
4086 n = n - env->icount_decr.u16.low;
4087 /* Generate a new TB ending on the I/O insn. */
4088 n++;
4089 /* On MIPS and SH, delay slot instructions can only be restarted if
4090 they were already the first instruction in the TB. If this is not
4091 the first instruction in a TB then re-execute the preceding
4092 branch. */
4093 #if defined(TARGET_MIPS)
4094 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4095 env->active_tc.PC -= 4;
4096 env->icount_decr.u16.low++;
4097 env->hflags &= ~MIPS_HFLAG_BMASK;
4098 }
4099 #elif defined(TARGET_SH4)
4100 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4101 && n > 1) {
4102 env->pc -= 2;
4103 env->icount_decr.u16.low++;
4104 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4105 }
4106 #endif
4107 /* This should never happen. */
4108 if (n > CF_COUNT_MASK)
4109 cpu_abort(env, "TB too big during recompile");
4110
4111 cflags = n | CF_LAST_IO;
4112 pc = tb->pc;
4113 cs_base = tb->cs_base;
4114 flags = tb->flags;
4115 tb_phys_invalidate(tb, -1);
4116 /* FIXME: In theory this could raise an exception. In practice
4117 we have already translated the block once so it's probably ok. */
4118 tb_gen_code(env, pc, cs_base, flags, cflags);
4119 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4120 the first in the TB) then we end up generating a whole new TB and
4121 repeating the fault, which is horribly inefficient.
4122 Better would be to execute just this insn uncached, or generate a
4123 second new TB. */
4124 cpu_resume_from_signal(env, NULL);
4125 }
4126
4127 #if !defined(CONFIG_USER_ONLY)
4128
4129 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4130 {
4131 int i, target_code_size, max_target_code_size;
4132 int direct_jmp_count, direct_jmp2_count, cross_page;
4133 TranslationBlock *tb;
4134
4135 target_code_size = 0;
4136 max_target_code_size = 0;
4137 cross_page = 0;
4138 direct_jmp_count = 0;
4139 direct_jmp2_count = 0;
4140 for(i = 0; i < nb_tbs; i++) {
4141 tb = &tbs[i];
4142 target_code_size += tb->size;
4143 if (tb->size > max_target_code_size)
4144 max_target_code_size = tb->size;
4145 if (tb->page_addr[1] != -1)
4146 cross_page++;
4147 if (tb->tb_next_offset[0] != 0xffff) {
4148 direct_jmp_count++;
4149 if (tb->tb_next_offset[1] != 0xffff) {
4150 direct_jmp2_count++;
4151 }
4152 }
4153 }
4154 /* XXX: avoid using doubles ? */
4155 cpu_fprintf(f, "Translation buffer state:\n");
4156 cpu_fprintf(f, "gen code size %td/%zd\n",
4157 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4158 cpu_fprintf(f, "TB count %d/%d\n",
4159 nb_tbs, code_gen_max_blocks);
4160 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4161 nb_tbs ? target_code_size / nb_tbs : 0,
4162 max_target_code_size);
4163 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4164 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4165 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4166 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4167 cross_page,
4168 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4169 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4170 direct_jmp_count,
4171 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4172 direct_jmp2_count,
4173 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4174 cpu_fprintf(f, "\nStatistics:\n");
4175 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4176 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4177 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4178 tcg_dump_info(f, cpu_fprintf);
4179 }
4180
4181 /*
4182 * A helper function for the _utterly broken_ virtio device model to find out if
4183 * it's running on a big endian machine. Don't do this at home kids!
4184 */
4185 bool virtio_is_big_endian(void);
4186 bool virtio_is_big_endian(void)
4187 {
4188 #if defined(TARGET_WORDS_BIGENDIAN)
4189 return true;
4190 #else
4191 return false;
4192 #endif
4193 }
4194
4195 #endif
4196
4197 #ifndef CONFIG_USER_ONLY
4198 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4199 {
4200 MemoryRegionSection *section;
4201
4202 section = phys_page_find(address_space_memory.dispatch,
4203 phys_addr >> TARGET_PAGE_BITS);
4204
4205 return !(memory_region_is_ram(section->mr) ||
4206 memory_region_is_romd(section->mr));
4207 }
4208 #endif