]> git.proxmox.com Git - qemu.git/blob - exec.c
Merge remote-tracking branch 'bonzini/threadpool' into staging
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
94
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
98
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
103
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
106
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
109
110 #endif
111
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
120
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
132
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
144
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
148
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
161
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
163
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
169
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
173
174 #if !defined(CONFIG_USER_ONLY)
175
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
182
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191
192 static MemoryRegion io_mem_watch;
193 #endif
194
195 /* statistics */
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
198
199 #ifdef _WIN32
200 static inline void map_exec(void *addr, long size)
201 {
202 DWORD old_protect;
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
205
206 }
207 #else
208 static inline void map_exec(void *addr, long size)
209 {
210 unsigned long start, end, page_size;
211
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
215
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
219
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
222 }
223 #endif
224
225 static void page_init(void)
226 {
227 /* NOTE: we can always suppose that qemu_host_page_size >=
228 TARGET_PAGE_SIZE */
229 #ifdef _WIN32
230 {
231 SYSTEM_INFO system_info;
232
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
235 }
236 #else
237 qemu_real_host_page_size = getpagesize();
238 #endif
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
244
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
246 {
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
249 int i, cnt;
250
251 freep = kinfo_getvmmap(getpid(), &cnt);
252 if (freep) {
253 mmap_lock();
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
256
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
261
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265 } else {
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267 endaddr = ~0ul;
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #endif
270 }
271 }
272 }
273 free(freep);
274 mmap_unlock();
275 }
276 #else
277 FILE *f;
278
279 last_brk = (unsigned long)sbrk(0);
280
281 f = fopen("/compat/linux/proc/self/maps", "r");
282 if (f) {
283 mmap_lock();
284
285 do {
286 unsigned long startaddr, endaddr;
287 int n;
288
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
290
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
296 } else {
297 endaddr = ~0ul;
298 }
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 }
301 } while (!feof(f));
302
303 fclose(f);
304 mmap_unlock();
305 }
306 #endif
307 }
308 #endif
309 }
310
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
312 {
313 PageDesc *pd;
314 void **lp;
315 int i;
316
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
320 do { \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
323 } while (0)
324 #else
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
327 #endif
328
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
331
332 /* Level 2..N-1. */
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334 void **p = *lp;
335
336 if (p == NULL) {
337 if (!alloc) {
338 return NULL;
339 }
340 ALLOC(p, sizeof(void *) * L2_SIZE);
341 *lp = p;
342 }
343
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
345 }
346
347 pd = *lp;
348 if (pd == NULL) {
349 if (!alloc) {
350 return NULL;
351 }
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353 *lp = pd;
354 }
355
356 #undef ALLOC
357
358 return pd + (index & (L2_SIZE - 1));
359 }
360
361 static inline PageDesc *page_find(tb_page_addr_t index)
362 {
363 return page_find_alloc(index, 0);
364 }
365
366 #if !defined(CONFIG_USER_ONLY)
367
368 static void phys_map_node_reserve(unsigned nodes)
369 {
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
377 }
378 }
379
380 static uint16_t phys_map_node_alloc(void)
381 {
382 unsigned i;
383 uint16_t ret;
384
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
391 }
392 return ret;
393 }
394
395 static void phys_map_nodes_reset(void)
396 {
397 phys_map_nodes_nb = 0;
398 }
399
400
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
403 int level)
404 {
405 PhysPageEntry *p;
406 int i;
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
408
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
412 if (level == 0) {
413 for (i = 0; i < L2_SIZE; i++) {
414 p[i].is_leaf = 1;
415 p[i].ptr = phys_section_unassigned;
416 }
417 }
418 } else {
419 p = phys_map_nodes[lp->ptr];
420 }
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
422
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
425 lp->is_leaf = true;
426 lp->ptr = leaf;
427 *index += step;
428 *nb -= step;
429 } else {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
431 }
432 ++lp;
433 }
434 }
435
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
438 uint16_t leaf)
439 {
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
442
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
444 }
445
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
447 {
448 PhysPageEntry lp = d->phys_map;
449 PhysPageEntry *p;
450 int i;
451 uint16_t s_index = phys_section_unassigned;
452
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
455 goto not_found;
456 }
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
459 }
460
461 s_index = lp.ptr;
462 not_found:
463 return &phys_sections[s_index];
464 }
465
466 bool memory_region_is_unassigned(MemoryRegion *mr)
467 {
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
471 }
472
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
475 #endif
476
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
483 #endif
484
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
490 # define USE_MMAP
491 #endif
492
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
496
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
509 #else
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
511 #endif
512
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
514
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
518
519 static inline size_t size_code_gen_buffer(size_t tb_size)
520 {
521 /* Size the buffer. */
522 if (tb_size == 0) {
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525 #else
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
531 #endif
532 }
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
535 }
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
538 }
539 code_gen_buffer_size = tb_size;
540 return tb_size;
541 }
542
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
546
547 static inline void *alloc_code_gen_buffer(void)
548 {
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
551 }
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
554 {
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556 uintptr_t start = 0;
557 void *buf;
558
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
570 flags |= MAP_32BIT;
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
574 }
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
579 # endif
580
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
584 }
585 #else
586 static inline void *alloc_code_gen_buffer(void)
587 {
588 void *buf = g_malloc(code_gen_buffer_size);
589 if (buf) {
590 map_exec(buf, code_gen_buffer_size);
591 }
592 return buf;
593 }
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
595
596 static inline void code_gen_alloc(size_t tb_size)
597 {
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602 exit(1);
603 }
604
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
609 as executable. */
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
612
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
617 }
618
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
621 size. */
622 void tcg_exec_init(unsigned long tb_size)
623 {
624 cpu_gen_init();
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628 page_init();
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
633 #endif
634 }
635
636 bool tcg_enabled(void)
637 {
638 return code_gen_buffer != NULL;
639 }
640
641 void cpu_exec_init_all(void)
642 {
643 #if !defined(CONFIG_USER_ONLY)
644 memory_map_init();
645 io_mem_init();
646 #endif
647 }
648
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
650
651 static int cpu_common_post_load(void *opaque, int version_id)
652 {
653 CPUArchState *env = opaque;
654
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
658 tlb_flush(env, 1);
659
660 return 0;
661 }
662
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
673 }
674 };
675 #endif
676
677 CPUArchState *qemu_get_cpu(int cpu)
678 {
679 CPUArchState *env = first_cpu;
680
681 while (env) {
682 if (env->cpu_index == cpu)
683 break;
684 env = env->next_cpu;
685 }
686
687 return env;
688 }
689
690 void cpu_exec_init(CPUArchState *env)
691 {
692 #ifndef CONFIG_USER_ONLY
693 CPUState *cpu = ENV_GET_CPU(env);
694 #endif
695 CPUArchState **penv;
696 int cpu_index;
697
698 #if defined(CONFIG_USER_ONLY)
699 cpu_list_lock();
700 #endif
701 env->next_cpu = NULL;
702 penv = &first_cpu;
703 cpu_index = 0;
704 while (*penv != NULL) {
705 penv = &(*penv)->next_cpu;
706 cpu_index++;
707 }
708 env->cpu_index = cpu_index;
709 env->numa_node = 0;
710 QTAILQ_INIT(&env->breakpoints);
711 QTAILQ_INIT(&env->watchpoints);
712 #ifndef CONFIG_USER_ONLY
713 cpu->thread_id = qemu_get_thread_id();
714 #endif
715 *penv = env;
716 #if defined(CONFIG_USER_ONLY)
717 cpu_list_unlock();
718 #endif
719 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
720 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
721 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
722 cpu_save, cpu_load, env);
723 #endif
724 }
725
726 /* Allocate a new translation block. Flush the translation buffer if
727 too many translation blocks or too much generated code. */
728 static TranslationBlock *tb_alloc(target_ulong pc)
729 {
730 TranslationBlock *tb;
731
732 if (nb_tbs >= code_gen_max_blocks ||
733 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
734 return NULL;
735 tb = &tbs[nb_tbs++];
736 tb->pc = pc;
737 tb->cflags = 0;
738 return tb;
739 }
740
741 void tb_free(TranslationBlock *tb)
742 {
743 /* In practice this is mostly used for single use temporary TB
744 Ignore the hard cases and just back up if this TB happens to
745 be the last one generated. */
746 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
747 code_gen_ptr = tb->tc_ptr;
748 nb_tbs--;
749 }
750 }
751
752 static inline void invalidate_page_bitmap(PageDesc *p)
753 {
754 if (p->code_bitmap) {
755 g_free(p->code_bitmap);
756 p->code_bitmap = NULL;
757 }
758 p->code_write_count = 0;
759 }
760
761 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
762
763 static void page_flush_tb_1 (int level, void **lp)
764 {
765 int i;
766
767 if (*lp == NULL) {
768 return;
769 }
770 if (level == 0) {
771 PageDesc *pd = *lp;
772 for (i = 0; i < L2_SIZE; ++i) {
773 pd[i].first_tb = NULL;
774 invalidate_page_bitmap(pd + i);
775 }
776 } else {
777 void **pp = *lp;
778 for (i = 0; i < L2_SIZE; ++i) {
779 page_flush_tb_1 (level - 1, pp + i);
780 }
781 }
782 }
783
784 static void page_flush_tb(void)
785 {
786 int i;
787 for (i = 0; i < V_L1_SIZE; i++) {
788 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
789 }
790 }
791
792 /* flush all the translation blocks */
793 /* XXX: tb_flush is currently not thread safe */
794 void tb_flush(CPUArchState *env1)
795 {
796 CPUArchState *env;
797 #if defined(DEBUG_FLUSH)
798 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
799 (unsigned long)(code_gen_ptr - code_gen_buffer),
800 nb_tbs, nb_tbs > 0 ?
801 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
802 #endif
803 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
804 cpu_abort(env1, "Internal error: code buffer overflow\n");
805
806 nb_tbs = 0;
807
808 for(env = first_cpu; env != NULL; env = env->next_cpu) {
809 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
810 }
811
812 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
813 page_flush_tb();
814
815 code_gen_ptr = code_gen_buffer;
816 /* XXX: flush processor icache at this point if cache flush is
817 expensive */
818 tb_flush_count++;
819 }
820
821 #ifdef DEBUG_TB_CHECK
822
823 static void tb_invalidate_check(target_ulong address)
824 {
825 TranslationBlock *tb;
826 int i;
827 address &= TARGET_PAGE_MASK;
828 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
829 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
830 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
831 address >= tb->pc + tb->size)) {
832 printf("ERROR invalidate: address=" TARGET_FMT_lx
833 " PC=%08lx size=%04x\n",
834 address, (long)tb->pc, tb->size);
835 }
836 }
837 }
838 }
839
840 /* verify that all the pages have correct rights for code */
841 static void tb_page_check(void)
842 {
843 TranslationBlock *tb;
844 int i, flags1, flags2;
845
846 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
847 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
848 flags1 = page_get_flags(tb->pc);
849 flags2 = page_get_flags(tb->pc + tb->size - 1);
850 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
851 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
852 (long)tb->pc, tb->size, flags1, flags2);
853 }
854 }
855 }
856 }
857
858 #endif
859
860 /* invalidate one TB */
861 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
862 int next_offset)
863 {
864 TranslationBlock *tb1;
865 for(;;) {
866 tb1 = *ptb;
867 if (tb1 == tb) {
868 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
869 break;
870 }
871 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
872 }
873 }
874
875 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
876 {
877 TranslationBlock *tb1;
878 unsigned int n1;
879
880 for(;;) {
881 tb1 = *ptb;
882 n1 = (uintptr_t)tb1 & 3;
883 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
884 if (tb1 == tb) {
885 *ptb = tb1->page_next[n1];
886 break;
887 }
888 ptb = &tb1->page_next[n1];
889 }
890 }
891
892 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
893 {
894 TranslationBlock *tb1, **ptb;
895 unsigned int n1;
896
897 ptb = &tb->jmp_next[n];
898 tb1 = *ptb;
899 if (tb1) {
900 /* find tb(n) in circular list */
901 for(;;) {
902 tb1 = *ptb;
903 n1 = (uintptr_t)tb1 & 3;
904 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
905 if (n1 == n && tb1 == tb)
906 break;
907 if (n1 == 2) {
908 ptb = &tb1->jmp_first;
909 } else {
910 ptb = &tb1->jmp_next[n1];
911 }
912 }
913 /* now we can suppress tb(n) from the list */
914 *ptb = tb->jmp_next[n];
915
916 tb->jmp_next[n] = NULL;
917 }
918 }
919
920 /* reset the jump entry 'n' of a TB so that it is not chained to
921 another TB */
922 static inline void tb_reset_jump(TranslationBlock *tb, int n)
923 {
924 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
925 }
926
927 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
928 {
929 CPUArchState *env;
930 PageDesc *p;
931 unsigned int h, n1;
932 tb_page_addr_t phys_pc;
933 TranslationBlock *tb1, *tb2;
934
935 /* remove the TB from the hash list */
936 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
937 h = tb_phys_hash_func(phys_pc);
938 tb_remove(&tb_phys_hash[h], tb,
939 offsetof(TranslationBlock, phys_hash_next));
940
941 /* remove the TB from the page list */
942 if (tb->page_addr[0] != page_addr) {
943 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
944 tb_page_remove(&p->first_tb, tb);
945 invalidate_page_bitmap(p);
946 }
947 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
948 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
951 }
952
953 tb_invalidated_flag = 1;
954
955 /* remove the TB from the hash list */
956 h = tb_jmp_cache_hash_func(tb->pc);
957 for(env = first_cpu; env != NULL; env = env->next_cpu) {
958 if (env->tb_jmp_cache[h] == tb)
959 env->tb_jmp_cache[h] = NULL;
960 }
961
962 /* suppress this TB from the two jump lists */
963 tb_jmp_remove(tb, 0);
964 tb_jmp_remove(tb, 1);
965
966 /* suppress any remaining jumps to this TB */
967 tb1 = tb->jmp_first;
968 for(;;) {
969 n1 = (uintptr_t)tb1 & 3;
970 if (n1 == 2)
971 break;
972 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
973 tb2 = tb1->jmp_next[n1];
974 tb_reset_jump(tb1, n1);
975 tb1->jmp_next[n1] = NULL;
976 tb1 = tb2;
977 }
978 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
979
980 tb_phys_invalidate_count++;
981 }
982
983 static inline void set_bits(uint8_t *tab, int start, int len)
984 {
985 int end, mask, end1;
986
987 end = start + len;
988 tab += start >> 3;
989 mask = 0xff << (start & 7);
990 if ((start & ~7) == (end & ~7)) {
991 if (start < end) {
992 mask &= ~(0xff << (end & 7));
993 *tab |= mask;
994 }
995 } else {
996 *tab++ |= mask;
997 start = (start + 8) & ~7;
998 end1 = end & ~7;
999 while (start < end1) {
1000 *tab++ = 0xff;
1001 start += 8;
1002 }
1003 if (start < end) {
1004 mask = ~(0xff << (end & 7));
1005 *tab |= mask;
1006 }
1007 }
1008 }
1009
1010 static void build_page_bitmap(PageDesc *p)
1011 {
1012 int n, tb_start, tb_end;
1013 TranslationBlock *tb;
1014
1015 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1016
1017 tb = p->first_tb;
1018 while (tb != NULL) {
1019 n = (uintptr_t)tb & 3;
1020 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1021 /* NOTE: this is subtle as a TB may span two physical pages */
1022 if (n == 0) {
1023 /* NOTE: tb_end may be after the end of the page, but
1024 it is not a problem */
1025 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1026 tb_end = tb_start + tb->size;
1027 if (tb_end > TARGET_PAGE_SIZE)
1028 tb_end = TARGET_PAGE_SIZE;
1029 } else {
1030 tb_start = 0;
1031 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1032 }
1033 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1034 tb = tb->page_next[n];
1035 }
1036 }
1037
1038 TranslationBlock *tb_gen_code(CPUArchState *env,
1039 target_ulong pc, target_ulong cs_base,
1040 int flags, int cflags)
1041 {
1042 TranslationBlock *tb;
1043 uint8_t *tc_ptr;
1044 tb_page_addr_t phys_pc, phys_page2;
1045 target_ulong virt_page2;
1046 int code_gen_size;
1047
1048 phys_pc = get_page_addr_code(env, pc);
1049 tb = tb_alloc(pc);
1050 if (!tb) {
1051 /* flush must be done */
1052 tb_flush(env);
1053 /* cannot fail at this point */
1054 tb = tb_alloc(pc);
1055 /* Don't forget to invalidate previous TB info. */
1056 tb_invalidated_flag = 1;
1057 }
1058 tc_ptr = code_gen_ptr;
1059 tb->tc_ptr = tc_ptr;
1060 tb->cs_base = cs_base;
1061 tb->flags = flags;
1062 tb->cflags = cflags;
1063 cpu_gen_code(env, tb, &code_gen_size);
1064 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1065 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1066
1067 /* check next page if needed */
1068 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1069 phys_page2 = -1;
1070 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1071 phys_page2 = get_page_addr_code(env, virt_page2);
1072 }
1073 tb_link_page(tb, phys_pc, phys_page2);
1074 return tb;
1075 }
1076
1077 /*
1078 * Invalidate all TBs which intersect with the target physical address range
1079 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1080 * 'is_cpu_write_access' should be true if called from a real cpu write
1081 * access: the virtual CPU will exit the current TB if code is modified inside
1082 * this TB.
1083 */
1084 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1085 int is_cpu_write_access)
1086 {
1087 while (start < end) {
1088 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1089 start &= TARGET_PAGE_MASK;
1090 start += TARGET_PAGE_SIZE;
1091 }
1092 }
1093
1094 /*
1095 * Invalidate all TBs which intersect with the target physical address range
1096 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1097 * 'is_cpu_write_access' should be true if called from a real cpu write
1098 * access: the virtual CPU will exit the current TB if code is modified inside
1099 * this TB.
1100 */
1101 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1102 int is_cpu_write_access)
1103 {
1104 TranslationBlock *tb, *tb_next, *saved_tb;
1105 CPUArchState *env = cpu_single_env;
1106 tb_page_addr_t tb_start, tb_end;
1107 PageDesc *p;
1108 int n;
1109 #ifdef TARGET_HAS_PRECISE_SMC
1110 int current_tb_not_found = is_cpu_write_access;
1111 TranslationBlock *current_tb = NULL;
1112 int current_tb_modified = 0;
1113 target_ulong current_pc = 0;
1114 target_ulong current_cs_base = 0;
1115 int current_flags = 0;
1116 #endif /* TARGET_HAS_PRECISE_SMC */
1117
1118 p = page_find(start >> TARGET_PAGE_BITS);
1119 if (!p)
1120 return;
1121 if (!p->code_bitmap &&
1122 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1123 is_cpu_write_access) {
1124 /* build code bitmap */
1125 build_page_bitmap(p);
1126 }
1127
1128 /* we remove all the TBs in the range [start, end[ */
1129 /* XXX: see if in some cases it could be faster to invalidate all the code */
1130 tb = p->first_tb;
1131 while (tb != NULL) {
1132 n = (uintptr_t)tb & 3;
1133 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1134 tb_next = tb->page_next[n];
1135 /* NOTE: this is subtle as a TB may span two physical pages */
1136 if (n == 0) {
1137 /* NOTE: tb_end may be after the end of the page, but
1138 it is not a problem */
1139 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1140 tb_end = tb_start + tb->size;
1141 } else {
1142 tb_start = tb->page_addr[1];
1143 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1144 }
1145 if (!(tb_end <= start || tb_start >= end)) {
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 if (current_tb_not_found) {
1148 current_tb_not_found = 0;
1149 current_tb = NULL;
1150 if (env->mem_io_pc) {
1151 /* now we have a real cpu fault */
1152 current_tb = tb_find_pc(env->mem_io_pc);
1153 }
1154 }
1155 if (current_tb == tb &&
1156 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1157 /* If we are modifying the current TB, we must stop
1158 its execution. We could be more precise by checking
1159 that the modification is after the current PC, but it
1160 would require a specialized function to partially
1161 restore the CPU state */
1162
1163 current_tb_modified = 1;
1164 cpu_restore_state(current_tb, env, env->mem_io_pc);
1165 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1166 &current_flags);
1167 }
1168 #endif /* TARGET_HAS_PRECISE_SMC */
1169 /* we need to do that to handle the case where a signal
1170 occurs while doing tb_phys_invalidate() */
1171 saved_tb = NULL;
1172 if (env) {
1173 saved_tb = env->current_tb;
1174 env->current_tb = NULL;
1175 }
1176 tb_phys_invalidate(tb, -1);
1177 if (env) {
1178 env->current_tb = saved_tb;
1179 if (env->interrupt_request && env->current_tb)
1180 cpu_interrupt(env, env->interrupt_request);
1181 }
1182 }
1183 tb = tb_next;
1184 }
1185 #if !defined(CONFIG_USER_ONLY)
1186 /* if no code remaining, no need to continue to use slow writes */
1187 if (!p->first_tb) {
1188 invalidate_page_bitmap(p);
1189 if (is_cpu_write_access) {
1190 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1191 }
1192 }
1193 #endif
1194 #ifdef TARGET_HAS_PRECISE_SMC
1195 if (current_tb_modified) {
1196 /* we generate a block containing just the instruction
1197 modifying the memory. It will ensure that it cannot modify
1198 itself */
1199 env->current_tb = NULL;
1200 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1201 cpu_resume_from_signal(env, NULL);
1202 }
1203 #endif
1204 }
1205
1206 /* len must be <= 8 and start must be a multiple of len */
1207 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1208 {
1209 PageDesc *p;
1210 int offset, b;
1211 #if 0
1212 if (1) {
1213 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1214 cpu_single_env->mem_io_vaddr, len,
1215 cpu_single_env->eip,
1216 cpu_single_env->eip +
1217 (intptr_t)cpu_single_env->segs[R_CS].base);
1218 }
1219 #endif
1220 p = page_find(start >> TARGET_PAGE_BITS);
1221 if (!p)
1222 return;
1223 if (p->code_bitmap) {
1224 offset = start & ~TARGET_PAGE_MASK;
1225 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1226 if (b & ((1 << len) - 1))
1227 goto do_invalidate;
1228 } else {
1229 do_invalidate:
1230 tb_invalidate_phys_page_range(start, start + len, 1);
1231 }
1232 }
1233
1234 #if !defined(CONFIG_SOFTMMU)
1235 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1236 uintptr_t pc, void *puc)
1237 {
1238 TranslationBlock *tb;
1239 PageDesc *p;
1240 int n;
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 TranslationBlock *current_tb = NULL;
1243 CPUArchState *env = cpu_single_env;
1244 int current_tb_modified = 0;
1245 target_ulong current_pc = 0;
1246 target_ulong current_cs_base = 0;
1247 int current_flags = 0;
1248 #endif
1249
1250 addr &= TARGET_PAGE_MASK;
1251 p = page_find(addr >> TARGET_PAGE_BITS);
1252 if (!p)
1253 return;
1254 tb = p->first_tb;
1255 #ifdef TARGET_HAS_PRECISE_SMC
1256 if (tb && pc != 0) {
1257 current_tb = tb_find_pc(pc);
1258 }
1259 #endif
1260 while (tb != NULL) {
1261 n = (uintptr_t)tb & 3;
1262 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb == tb &&
1265 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1266 /* If we are modifying the current TB, we must stop
1267 its execution. We could be more precise by checking
1268 that the modification is after the current PC, but it
1269 would require a specialized function to partially
1270 restore the CPU state */
1271
1272 current_tb_modified = 1;
1273 cpu_restore_state(current_tb, env, pc);
1274 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1275 &current_flags);
1276 }
1277 #endif /* TARGET_HAS_PRECISE_SMC */
1278 tb_phys_invalidate(tb, addr);
1279 tb = tb->page_next[n];
1280 }
1281 p->first_tb = NULL;
1282 #ifdef TARGET_HAS_PRECISE_SMC
1283 if (current_tb_modified) {
1284 /* we generate a block containing just the instruction
1285 modifying the memory. It will ensure that it cannot modify
1286 itself */
1287 env->current_tb = NULL;
1288 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1289 cpu_resume_from_signal(env, puc);
1290 }
1291 #endif
1292 }
1293 #endif
1294
1295 /* add the tb in the target page and protect it if necessary */
1296 static inline void tb_alloc_page(TranslationBlock *tb,
1297 unsigned int n, tb_page_addr_t page_addr)
1298 {
1299 PageDesc *p;
1300 #ifndef CONFIG_USER_ONLY
1301 bool page_already_protected;
1302 #endif
1303
1304 tb->page_addr[n] = page_addr;
1305 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1306 tb->page_next[n] = p->first_tb;
1307 #ifndef CONFIG_USER_ONLY
1308 page_already_protected = p->first_tb != NULL;
1309 #endif
1310 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1311 invalidate_page_bitmap(p);
1312
1313 #if defined(TARGET_HAS_SMC) || 1
1314
1315 #if defined(CONFIG_USER_ONLY)
1316 if (p->flags & PAGE_WRITE) {
1317 target_ulong addr;
1318 PageDesc *p2;
1319 int prot;
1320
1321 /* force the host page as non writable (writes will have a
1322 page fault + mprotect overhead) */
1323 page_addr &= qemu_host_page_mask;
1324 prot = 0;
1325 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1326 addr += TARGET_PAGE_SIZE) {
1327
1328 p2 = page_find (addr >> TARGET_PAGE_BITS);
1329 if (!p2)
1330 continue;
1331 prot |= p2->flags;
1332 p2->flags &= ~PAGE_WRITE;
1333 }
1334 mprotect(g2h(page_addr), qemu_host_page_size,
1335 (prot & PAGE_BITS) & ~PAGE_WRITE);
1336 #ifdef DEBUG_TB_INVALIDATE
1337 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1338 page_addr);
1339 #endif
1340 }
1341 #else
1342 /* if some code is already present, then the pages are already
1343 protected. So we handle the case where only the first TB is
1344 allocated in a physical page */
1345 if (!page_already_protected) {
1346 tlb_protect_code(page_addr);
1347 }
1348 #endif
1349
1350 #endif /* TARGET_HAS_SMC */
1351 }
1352
1353 /* add a new TB and link it to the physical page tables. phys_page2 is
1354 (-1) to indicate that only one page contains the TB. */
1355 void tb_link_page(TranslationBlock *tb,
1356 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1357 {
1358 unsigned int h;
1359 TranslationBlock **ptb;
1360
1361 /* Grab the mmap lock to stop another thread invalidating this TB
1362 before we are done. */
1363 mmap_lock();
1364 /* add in the physical hash table */
1365 h = tb_phys_hash_func(phys_pc);
1366 ptb = &tb_phys_hash[h];
1367 tb->phys_hash_next = *ptb;
1368 *ptb = tb;
1369
1370 /* add in the page list */
1371 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1372 if (phys_page2 != -1)
1373 tb_alloc_page(tb, 1, phys_page2);
1374 else
1375 tb->page_addr[1] = -1;
1376
1377 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1378 tb->jmp_next[0] = NULL;
1379 tb->jmp_next[1] = NULL;
1380
1381 /* init original jump addresses */
1382 if (tb->tb_next_offset[0] != 0xffff)
1383 tb_reset_jump(tb, 0);
1384 if (tb->tb_next_offset[1] != 0xffff)
1385 tb_reset_jump(tb, 1);
1386
1387 #ifdef DEBUG_TB_CHECK
1388 tb_page_check();
1389 #endif
1390 mmap_unlock();
1391 }
1392
1393 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1394 tb[1].tc_ptr. Return NULL if not found */
1395 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1396 {
1397 int m_min, m_max, m;
1398 uintptr_t v;
1399 TranslationBlock *tb;
1400
1401 if (nb_tbs <= 0)
1402 return NULL;
1403 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1404 tc_ptr >= (uintptr_t)code_gen_ptr) {
1405 return NULL;
1406 }
1407 /* binary search (cf Knuth) */
1408 m_min = 0;
1409 m_max = nb_tbs - 1;
1410 while (m_min <= m_max) {
1411 m = (m_min + m_max) >> 1;
1412 tb = &tbs[m];
1413 v = (uintptr_t)tb->tc_ptr;
1414 if (v == tc_ptr)
1415 return tb;
1416 else if (tc_ptr < v) {
1417 m_max = m - 1;
1418 } else {
1419 m_min = m + 1;
1420 }
1421 }
1422 return &tbs[m_max];
1423 }
1424
1425 static void tb_reset_jump_recursive(TranslationBlock *tb);
1426
1427 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1428 {
1429 TranslationBlock *tb1, *tb_next, **ptb;
1430 unsigned int n1;
1431
1432 tb1 = tb->jmp_next[n];
1433 if (tb1 != NULL) {
1434 /* find head of list */
1435 for(;;) {
1436 n1 = (uintptr_t)tb1 & 3;
1437 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1438 if (n1 == 2)
1439 break;
1440 tb1 = tb1->jmp_next[n1];
1441 }
1442 /* we are now sure now that tb jumps to tb1 */
1443 tb_next = tb1;
1444
1445 /* remove tb from the jmp_first list */
1446 ptb = &tb_next->jmp_first;
1447 for(;;) {
1448 tb1 = *ptb;
1449 n1 = (uintptr_t)tb1 & 3;
1450 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1451 if (n1 == n && tb1 == tb)
1452 break;
1453 ptb = &tb1->jmp_next[n1];
1454 }
1455 *ptb = tb->jmp_next[n];
1456 tb->jmp_next[n] = NULL;
1457
1458 /* suppress the jump to next tb in generated code */
1459 tb_reset_jump(tb, n);
1460
1461 /* suppress jumps in the tb on which we could have jumped */
1462 tb_reset_jump_recursive(tb_next);
1463 }
1464 }
1465
1466 static void tb_reset_jump_recursive(TranslationBlock *tb)
1467 {
1468 tb_reset_jump_recursive2(tb, 0);
1469 tb_reset_jump_recursive2(tb, 1);
1470 }
1471
1472 #if defined(TARGET_HAS_ICE)
1473 #if defined(CONFIG_USER_ONLY)
1474 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1475 {
1476 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1477 }
1478 #else
1479 void tb_invalidate_phys_addr(hwaddr addr)
1480 {
1481 ram_addr_t ram_addr;
1482 MemoryRegionSection *section;
1483
1484 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1485 if (!(memory_region_is_ram(section->mr)
1486 || (section->mr->rom_device && section->mr->readable))) {
1487 return;
1488 }
1489 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1490 + memory_region_section_addr(section, addr);
1491 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1492 }
1493
1494 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1495 {
1496 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1497 (pc & ~TARGET_PAGE_MASK));
1498 }
1499 #endif
1500 #endif /* TARGET_HAS_ICE */
1501
1502 #if defined(CONFIG_USER_ONLY)
1503 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1504
1505 {
1506 }
1507
1508 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1509 int flags, CPUWatchpoint **watchpoint)
1510 {
1511 return -ENOSYS;
1512 }
1513 #else
1514 /* Add a watchpoint. */
1515 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1516 int flags, CPUWatchpoint **watchpoint)
1517 {
1518 target_ulong len_mask = ~(len - 1);
1519 CPUWatchpoint *wp;
1520
1521 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1522 if ((len & (len - 1)) || (addr & ~len_mask) ||
1523 len == 0 || len > TARGET_PAGE_SIZE) {
1524 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1525 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1526 return -EINVAL;
1527 }
1528 wp = g_malloc(sizeof(*wp));
1529
1530 wp->vaddr = addr;
1531 wp->len_mask = len_mask;
1532 wp->flags = flags;
1533
1534 /* keep all GDB-injected watchpoints in front */
1535 if (flags & BP_GDB)
1536 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1537 else
1538 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1539
1540 tlb_flush_page(env, addr);
1541
1542 if (watchpoint)
1543 *watchpoint = wp;
1544 return 0;
1545 }
1546
1547 /* Remove a specific watchpoint. */
1548 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1549 int flags)
1550 {
1551 target_ulong len_mask = ~(len - 1);
1552 CPUWatchpoint *wp;
1553
1554 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1555 if (addr == wp->vaddr && len_mask == wp->len_mask
1556 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1557 cpu_watchpoint_remove_by_ref(env, wp);
1558 return 0;
1559 }
1560 }
1561 return -ENOENT;
1562 }
1563
1564 /* Remove a specific watchpoint by reference. */
1565 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1566 {
1567 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1568
1569 tlb_flush_page(env, watchpoint->vaddr);
1570
1571 g_free(watchpoint);
1572 }
1573
1574 /* Remove all matching watchpoints. */
1575 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1576 {
1577 CPUWatchpoint *wp, *next;
1578
1579 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1580 if (wp->flags & mask)
1581 cpu_watchpoint_remove_by_ref(env, wp);
1582 }
1583 }
1584 #endif
1585
1586 /* Add a breakpoint. */
1587 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1588 CPUBreakpoint **breakpoint)
1589 {
1590 #if defined(TARGET_HAS_ICE)
1591 CPUBreakpoint *bp;
1592
1593 bp = g_malloc(sizeof(*bp));
1594
1595 bp->pc = pc;
1596 bp->flags = flags;
1597
1598 /* keep all GDB-injected breakpoints in front */
1599 if (flags & BP_GDB)
1600 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1601 else
1602 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1603
1604 breakpoint_invalidate(env, pc);
1605
1606 if (breakpoint)
1607 *breakpoint = bp;
1608 return 0;
1609 #else
1610 return -ENOSYS;
1611 #endif
1612 }
1613
1614 /* Remove a specific breakpoint. */
1615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1616 {
1617 #if defined(TARGET_HAS_ICE)
1618 CPUBreakpoint *bp;
1619
1620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1621 if (bp->pc == pc && bp->flags == flags) {
1622 cpu_breakpoint_remove_by_ref(env, bp);
1623 return 0;
1624 }
1625 }
1626 return -ENOENT;
1627 #else
1628 return -ENOSYS;
1629 #endif
1630 }
1631
1632 /* Remove a specific breakpoint by reference. */
1633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1634 {
1635 #if defined(TARGET_HAS_ICE)
1636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1637
1638 breakpoint_invalidate(env, breakpoint->pc);
1639
1640 g_free(breakpoint);
1641 #endif
1642 }
1643
1644 /* Remove all matching breakpoints. */
1645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1646 {
1647 #if defined(TARGET_HAS_ICE)
1648 CPUBreakpoint *bp, *next;
1649
1650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1651 if (bp->flags & mask)
1652 cpu_breakpoint_remove_by_ref(env, bp);
1653 }
1654 #endif
1655 }
1656
1657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1658 CPU loop after each instruction */
1659 void cpu_single_step(CPUArchState *env, int enabled)
1660 {
1661 #if defined(TARGET_HAS_ICE)
1662 if (env->singlestep_enabled != enabled) {
1663 env->singlestep_enabled = enabled;
1664 if (kvm_enabled())
1665 kvm_update_guest_debug(env, 0);
1666 else {
1667 /* must flush all the translated code to avoid inconsistencies */
1668 /* XXX: only flush what is necessary */
1669 tb_flush(env);
1670 }
1671 }
1672 #endif
1673 }
1674
1675 static void cpu_unlink_tb(CPUArchState *env)
1676 {
1677 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1678 problem and hope the cpu will stop of its own accord. For userspace
1679 emulation this often isn't actually as bad as it sounds. Often
1680 signals are used primarily to interrupt blocking syscalls. */
1681 TranslationBlock *tb;
1682 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1683
1684 spin_lock(&interrupt_lock);
1685 tb = env->current_tb;
1686 /* if the cpu is currently executing code, we must unlink it and
1687 all the potentially executing TB */
1688 if (tb) {
1689 env->current_tb = NULL;
1690 tb_reset_jump_recursive(tb);
1691 }
1692 spin_unlock(&interrupt_lock);
1693 }
1694
1695 #ifndef CONFIG_USER_ONLY
1696 /* mask must never be zero, except for A20 change call */
1697 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1698 {
1699 CPUState *cpu = ENV_GET_CPU(env);
1700 int old_mask;
1701
1702 old_mask = env->interrupt_request;
1703 env->interrupt_request |= mask;
1704
1705 /*
1706 * If called from iothread context, wake the target cpu in
1707 * case its halted.
1708 */
1709 if (!qemu_cpu_is_self(cpu)) {
1710 qemu_cpu_kick(cpu);
1711 return;
1712 }
1713
1714 if (use_icount) {
1715 env->icount_decr.u16.high = 0xffff;
1716 if (!can_do_io(env)
1717 && (mask & ~old_mask) != 0) {
1718 cpu_abort(env, "Raised interrupt while not in I/O function");
1719 }
1720 } else {
1721 cpu_unlink_tb(env);
1722 }
1723 }
1724
1725 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1726
1727 #else /* CONFIG_USER_ONLY */
1728
1729 void cpu_interrupt(CPUArchState *env, int mask)
1730 {
1731 env->interrupt_request |= mask;
1732 cpu_unlink_tb(env);
1733 }
1734 #endif /* CONFIG_USER_ONLY */
1735
1736 void cpu_reset_interrupt(CPUArchState *env, int mask)
1737 {
1738 env->interrupt_request &= ~mask;
1739 }
1740
1741 void cpu_exit(CPUArchState *env)
1742 {
1743 env->exit_request = 1;
1744 cpu_unlink_tb(env);
1745 }
1746
1747 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1748 {
1749 va_list ap;
1750 va_list ap2;
1751
1752 va_start(ap, fmt);
1753 va_copy(ap2, ap);
1754 fprintf(stderr, "qemu: fatal: ");
1755 vfprintf(stderr, fmt, ap);
1756 fprintf(stderr, "\n");
1757 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1758 if (qemu_log_enabled()) {
1759 qemu_log("qemu: fatal: ");
1760 qemu_log_vprintf(fmt, ap2);
1761 qemu_log("\n");
1762 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1763 qemu_log_flush();
1764 qemu_log_close();
1765 }
1766 va_end(ap2);
1767 va_end(ap);
1768 #if defined(CONFIG_USER_ONLY)
1769 {
1770 struct sigaction act;
1771 sigfillset(&act.sa_mask);
1772 act.sa_handler = SIG_DFL;
1773 sigaction(SIGABRT, &act, NULL);
1774 }
1775 #endif
1776 abort();
1777 }
1778
1779 CPUArchState *cpu_copy(CPUArchState *env)
1780 {
1781 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1782 CPUArchState *next_cpu = new_env->next_cpu;
1783 int cpu_index = new_env->cpu_index;
1784 #if defined(TARGET_HAS_ICE)
1785 CPUBreakpoint *bp;
1786 CPUWatchpoint *wp;
1787 #endif
1788
1789 memcpy(new_env, env, sizeof(CPUArchState));
1790
1791 /* Preserve chaining and index. */
1792 new_env->next_cpu = next_cpu;
1793 new_env->cpu_index = cpu_index;
1794
1795 /* Clone all break/watchpoints.
1796 Note: Once we support ptrace with hw-debug register access, make sure
1797 BP_CPU break/watchpoints are handled correctly on clone. */
1798 QTAILQ_INIT(&env->breakpoints);
1799 QTAILQ_INIT(&env->watchpoints);
1800 #if defined(TARGET_HAS_ICE)
1801 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1802 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1803 }
1804 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1805 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1806 wp->flags, NULL);
1807 }
1808 #endif
1809
1810 return new_env;
1811 }
1812
1813 #if !defined(CONFIG_USER_ONLY)
1814 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1815 {
1816 unsigned int i;
1817
1818 /* Discard jump cache entries for any tb which might potentially
1819 overlap the flushed page. */
1820 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1821 memset (&env->tb_jmp_cache[i], 0,
1822 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1823
1824 i = tb_jmp_cache_hash_page(addr);
1825 memset (&env->tb_jmp_cache[i], 0,
1826 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1827 }
1828
1829 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1830 uintptr_t length)
1831 {
1832 uintptr_t start1;
1833
1834 /* we modify the TLB cache so that the dirty bit will be set again
1835 when accessing the range */
1836 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1837 /* Check that we don't span multiple blocks - this breaks the
1838 address comparisons below. */
1839 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1840 != (end - 1) - start) {
1841 abort();
1842 }
1843 cpu_tlb_reset_dirty_all(start1, length);
1844
1845 }
1846
1847 /* Note: start and end must be within the same ram block. */
1848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1849 int dirty_flags)
1850 {
1851 uintptr_t length;
1852
1853 start &= TARGET_PAGE_MASK;
1854 end = TARGET_PAGE_ALIGN(end);
1855
1856 length = end - start;
1857 if (length == 0)
1858 return;
1859 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1860
1861 if (tcg_enabled()) {
1862 tlb_reset_dirty_range_all(start, end, length);
1863 }
1864 }
1865
1866 int cpu_physical_memory_set_dirty_tracking(int enable)
1867 {
1868 int ret = 0;
1869 in_migration = enable;
1870 return ret;
1871 }
1872
1873 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1874 MemoryRegionSection *section,
1875 target_ulong vaddr,
1876 hwaddr paddr,
1877 int prot,
1878 target_ulong *address)
1879 {
1880 hwaddr iotlb;
1881 CPUWatchpoint *wp;
1882
1883 if (memory_region_is_ram(section->mr)) {
1884 /* Normal RAM. */
1885 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1886 + memory_region_section_addr(section, paddr);
1887 if (!section->readonly) {
1888 iotlb |= phys_section_notdirty;
1889 } else {
1890 iotlb |= phys_section_rom;
1891 }
1892 } else {
1893 /* IO handlers are currently passed a physical address.
1894 It would be nice to pass an offset from the base address
1895 of that region. This would avoid having to special case RAM,
1896 and avoid full address decoding in every device.
1897 We can't use the high bits of pd for this because
1898 IO_MEM_ROMD uses these as a ram address. */
1899 iotlb = section - phys_sections;
1900 iotlb += memory_region_section_addr(section, paddr);
1901 }
1902
1903 /* Make accesses to pages with watchpoints go via the
1904 watchpoint trap routines. */
1905 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1906 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1907 /* Avoid trapping reads of pages with a write breakpoint. */
1908 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1909 iotlb = phys_section_watch + paddr;
1910 *address |= TLB_MMIO;
1911 break;
1912 }
1913 }
1914 }
1915
1916 return iotlb;
1917 }
1918
1919 #else
1920 /*
1921 * Walks guest process memory "regions" one by one
1922 * and calls callback function 'fn' for each region.
1923 */
1924
1925 struct walk_memory_regions_data
1926 {
1927 walk_memory_regions_fn fn;
1928 void *priv;
1929 uintptr_t start;
1930 int prot;
1931 };
1932
1933 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1934 abi_ulong end, int new_prot)
1935 {
1936 if (data->start != -1ul) {
1937 int rc = data->fn(data->priv, data->start, end, data->prot);
1938 if (rc != 0) {
1939 return rc;
1940 }
1941 }
1942
1943 data->start = (new_prot ? end : -1ul);
1944 data->prot = new_prot;
1945
1946 return 0;
1947 }
1948
1949 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1950 abi_ulong base, int level, void **lp)
1951 {
1952 abi_ulong pa;
1953 int i, rc;
1954
1955 if (*lp == NULL) {
1956 return walk_memory_regions_end(data, base, 0);
1957 }
1958
1959 if (level == 0) {
1960 PageDesc *pd = *lp;
1961 for (i = 0; i < L2_SIZE; ++i) {
1962 int prot = pd[i].flags;
1963
1964 pa = base | (i << TARGET_PAGE_BITS);
1965 if (prot != data->prot) {
1966 rc = walk_memory_regions_end(data, pa, prot);
1967 if (rc != 0) {
1968 return rc;
1969 }
1970 }
1971 }
1972 } else {
1973 void **pp = *lp;
1974 for (i = 0; i < L2_SIZE; ++i) {
1975 pa = base | ((abi_ulong)i <<
1976 (TARGET_PAGE_BITS + L2_BITS * level));
1977 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1978 if (rc != 0) {
1979 return rc;
1980 }
1981 }
1982 }
1983
1984 return 0;
1985 }
1986
1987 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1988 {
1989 struct walk_memory_regions_data data;
1990 uintptr_t i;
1991
1992 data.fn = fn;
1993 data.priv = priv;
1994 data.start = -1ul;
1995 data.prot = 0;
1996
1997 for (i = 0; i < V_L1_SIZE; i++) {
1998 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1999 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2000 if (rc != 0) {
2001 return rc;
2002 }
2003 }
2004
2005 return walk_memory_regions_end(&data, 0, 0);
2006 }
2007
2008 static int dump_region(void *priv, abi_ulong start,
2009 abi_ulong end, unsigned long prot)
2010 {
2011 FILE *f = (FILE *)priv;
2012
2013 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2014 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2015 start, end, end - start,
2016 ((prot & PAGE_READ) ? 'r' : '-'),
2017 ((prot & PAGE_WRITE) ? 'w' : '-'),
2018 ((prot & PAGE_EXEC) ? 'x' : '-'));
2019
2020 return (0);
2021 }
2022
2023 /* dump memory mappings */
2024 void page_dump(FILE *f)
2025 {
2026 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2027 "start", "end", "size", "prot");
2028 walk_memory_regions(f, dump_region);
2029 }
2030
2031 int page_get_flags(target_ulong address)
2032 {
2033 PageDesc *p;
2034
2035 p = page_find(address >> TARGET_PAGE_BITS);
2036 if (!p)
2037 return 0;
2038 return p->flags;
2039 }
2040
2041 /* Modify the flags of a page and invalidate the code if necessary.
2042 The flag PAGE_WRITE_ORG is positioned automatically depending
2043 on PAGE_WRITE. The mmap_lock should already be held. */
2044 void page_set_flags(target_ulong start, target_ulong end, int flags)
2045 {
2046 target_ulong addr, len;
2047
2048 /* This function should never be called with addresses outside the
2049 guest address space. If this assert fires, it probably indicates
2050 a missing call to h2g_valid. */
2051 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2052 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2053 #endif
2054 assert(start < end);
2055
2056 start = start & TARGET_PAGE_MASK;
2057 end = TARGET_PAGE_ALIGN(end);
2058
2059 if (flags & PAGE_WRITE) {
2060 flags |= PAGE_WRITE_ORG;
2061 }
2062
2063 for (addr = start, len = end - start;
2064 len != 0;
2065 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2066 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2067
2068 /* If the write protection bit is set, then we invalidate
2069 the code inside. */
2070 if (!(p->flags & PAGE_WRITE) &&
2071 (flags & PAGE_WRITE) &&
2072 p->first_tb) {
2073 tb_invalidate_phys_page(addr, 0, NULL);
2074 }
2075 p->flags = flags;
2076 }
2077 }
2078
2079 int page_check_range(target_ulong start, target_ulong len, int flags)
2080 {
2081 PageDesc *p;
2082 target_ulong end;
2083 target_ulong addr;
2084
2085 /* This function should never be called with addresses outside the
2086 guest address space. If this assert fires, it probably indicates
2087 a missing call to h2g_valid. */
2088 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2089 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2090 #endif
2091
2092 if (len == 0) {
2093 return 0;
2094 }
2095 if (start + len - 1 < start) {
2096 /* We've wrapped around. */
2097 return -1;
2098 }
2099
2100 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2101 start = start & TARGET_PAGE_MASK;
2102
2103 for (addr = start, len = end - start;
2104 len != 0;
2105 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2106 p = page_find(addr >> TARGET_PAGE_BITS);
2107 if( !p )
2108 return -1;
2109 if( !(p->flags & PAGE_VALID) )
2110 return -1;
2111
2112 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2113 return -1;
2114 if (flags & PAGE_WRITE) {
2115 if (!(p->flags & PAGE_WRITE_ORG))
2116 return -1;
2117 /* unprotect the page if it was put read-only because it
2118 contains translated code */
2119 if (!(p->flags & PAGE_WRITE)) {
2120 if (!page_unprotect(addr, 0, NULL))
2121 return -1;
2122 }
2123 return 0;
2124 }
2125 }
2126 return 0;
2127 }
2128
2129 /* called from signal handler: invalidate the code and unprotect the
2130 page. Return TRUE if the fault was successfully handled. */
2131 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2132 {
2133 unsigned int prot;
2134 PageDesc *p;
2135 target_ulong host_start, host_end, addr;
2136
2137 /* Technically this isn't safe inside a signal handler. However we
2138 know this only ever happens in a synchronous SEGV handler, so in
2139 practice it seems to be ok. */
2140 mmap_lock();
2141
2142 p = page_find(address >> TARGET_PAGE_BITS);
2143 if (!p) {
2144 mmap_unlock();
2145 return 0;
2146 }
2147
2148 /* if the page was really writable, then we change its
2149 protection back to writable */
2150 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2151 host_start = address & qemu_host_page_mask;
2152 host_end = host_start + qemu_host_page_size;
2153
2154 prot = 0;
2155 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2156 p = page_find(addr >> TARGET_PAGE_BITS);
2157 p->flags |= PAGE_WRITE;
2158 prot |= p->flags;
2159
2160 /* and since the content will be modified, we must invalidate
2161 the corresponding translated code. */
2162 tb_invalidate_phys_page(addr, pc, puc);
2163 #ifdef DEBUG_TB_CHECK
2164 tb_invalidate_check(addr);
2165 #endif
2166 }
2167 mprotect((void *)g2h(host_start), qemu_host_page_size,
2168 prot & PAGE_BITS);
2169
2170 mmap_unlock();
2171 return 1;
2172 }
2173 mmap_unlock();
2174 return 0;
2175 }
2176 #endif /* defined(CONFIG_USER_ONLY) */
2177
2178 #if !defined(CONFIG_USER_ONLY)
2179
2180 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2181 typedef struct subpage_t {
2182 MemoryRegion iomem;
2183 hwaddr base;
2184 uint16_t sub_section[TARGET_PAGE_SIZE];
2185 } subpage_t;
2186
2187 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2188 uint16_t section);
2189 static subpage_t *subpage_init(hwaddr base);
2190 static void destroy_page_desc(uint16_t section_index)
2191 {
2192 MemoryRegionSection *section = &phys_sections[section_index];
2193 MemoryRegion *mr = section->mr;
2194
2195 if (mr->subpage) {
2196 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2197 memory_region_destroy(&subpage->iomem);
2198 g_free(subpage);
2199 }
2200 }
2201
2202 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2203 {
2204 unsigned i;
2205 PhysPageEntry *p;
2206
2207 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2208 return;
2209 }
2210
2211 p = phys_map_nodes[lp->ptr];
2212 for (i = 0; i < L2_SIZE; ++i) {
2213 if (!p[i].is_leaf) {
2214 destroy_l2_mapping(&p[i], level - 1);
2215 } else {
2216 destroy_page_desc(p[i].ptr);
2217 }
2218 }
2219 lp->is_leaf = 0;
2220 lp->ptr = PHYS_MAP_NODE_NIL;
2221 }
2222
2223 static void destroy_all_mappings(AddressSpaceDispatch *d)
2224 {
2225 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2226 phys_map_nodes_reset();
2227 }
2228
2229 static uint16_t phys_section_add(MemoryRegionSection *section)
2230 {
2231 if (phys_sections_nb == phys_sections_nb_alloc) {
2232 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2233 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2234 phys_sections_nb_alloc);
2235 }
2236 phys_sections[phys_sections_nb] = *section;
2237 return phys_sections_nb++;
2238 }
2239
2240 static void phys_sections_clear(void)
2241 {
2242 phys_sections_nb = 0;
2243 }
2244
2245 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2246 {
2247 subpage_t *subpage;
2248 hwaddr base = section->offset_within_address_space
2249 & TARGET_PAGE_MASK;
2250 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2251 MemoryRegionSection subsection = {
2252 .offset_within_address_space = base,
2253 .size = TARGET_PAGE_SIZE,
2254 };
2255 hwaddr start, end;
2256
2257 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2258
2259 if (!(existing->mr->subpage)) {
2260 subpage = subpage_init(base);
2261 subsection.mr = &subpage->iomem;
2262 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2263 phys_section_add(&subsection));
2264 } else {
2265 subpage = container_of(existing->mr, subpage_t, iomem);
2266 }
2267 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2268 end = start + section->size - 1;
2269 subpage_register(subpage, start, end, phys_section_add(section));
2270 }
2271
2272
2273 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2274 {
2275 hwaddr start_addr = section->offset_within_address_space;
2276 ram_addr_t size = section->size;
2277 hwaddr addr;
2278 uint16_t section_index = phys_section_add(section);
2279
2280 assert(size);
2281
2282 addr = start_addr;
2283 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2284 section_index);
2285 }
2286
2287 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2288 {
2289 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2290 MemoryRegionSection now = *section, remain = *section;
2291
2292 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2293 || (now.size < TARGET_PAGE_SIZE)) {
2294 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2295 - now.offset_within_address_space,
2296 now.size);
2297 register_subpage(d, &now);
2298 remain.size -= now.size;
2299 remain.offset_within_address_space += now.size;
2300 remain.offset_within_region += now.size;
2301 }
2302 while (remain.size >= TARGET_PAGE_SIZE) {
2303 now = remain;
2304 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2305 now.size = TARGET_PAGE_SIZE;
2306 register_subpage(d, &now);
2307 } else {
2308 now.size &= TARGET_PAGE_MASK;
2309 register_multipage(d, &now);
2310 }
2311 remain.size -= now.size;
2312 remain.offset_within_address_space += now.size;
2313 remain.offset_within_region += now.size;
2314 }
2315 now = remain;
2316 if (now.size) {
2317 register_subpage(d, &now);
2318 }
2319 }
2320
2321 void qemu_flush_coalesced_mmio_buffer(void)
2322 {
2323 if (kvm_enabled())
2324 kvm_flush_coalesced_mmio_buffer();
2325 }
2326
2327 #if defined(__linux__) && !defined(TARGET_S390X)
2328
2329 #include <sys/vfs.h>
2330
2331 #define HUGETLBFS_MAGIC 0x958458f6
2332
2333 static long gethugepagesize(const char *path)
2334 {
2335 struct statfs fs;
2336 int ret;
2337
2338 do {
2339 ret = statfs(path, &fs);
2340 } while (ret != 0 && errno == EINTR);
2341
2342 if (ret != 0) {
2343 perror(path);
2344 return 0;
2345 }
2346
2347 if (fs.f_type != HUGETLBFS_MAGIC)
2348 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2349
2350 return fs.f_bsize;
2351 }
2352
2353 static void *file_ram_alloc(RAMBlock *block,
2354 ram_addr_t memory,
2355 const char *path)
2356 {
2357 char *filename;
2358 void *area;
2359 int fd;
2360 #ifdef MAP_POPULATE
2361 int flags;
2362 #endif
2363 unsigned long hpagesize;
2364
2365 hpagesize = gethugepagesize(path);
2366 if (!hpagesize) {
2367 return NULL;
2368 }
2369
2370 if (memory < hpagesize) {
2371 return NULL;
2372 }
2373
2374 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2375 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2376 return NULL;
2377 }
2378
2379 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2380 return NULL;
2381 }
2382
2383 fd = mkstemp(filename);
2384 if (fd < 0) {
2385 perror("unable to create backing store for hugepages");
2386 free(filename);
2387 return NULL;
2388 }
2389 unlink(filename);
2390 free(filename);
2391
2392 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2393
2394 /*
2395 * ftruncate is not supported by hugetlbfs in older
2396 * hosts, so don't bother bailing out on errors.
2397 * If anything goes wrong with it under other filesystems,
2398 * mmap will fail.
2399 */
2400 if (ftruncate(fd, memory))
2401 perror("ftruncate");
2402
2403 #ifdef MAP_POPULATE
2404 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2405 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2406 * to sidestep this quirk.
2407 */
2408 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2409 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2410 #else
2411 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2412 #endif
2413 if (area == MAP_FAILED) {
2414 perror("file_ram_alloc: can't mmap RAM pages");
2415 close(fd);
2416 return (NULL);
2417 }
2418 block->fd = fd;
2419 return area;
2420 }
2421 #endif
2422
2423 static ram_addr_t find_ram_offset(ram_addr_t size)
2424 {
2425 RAMBlock *block, *next_block;
2426 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2427
2428 if (QLIST_EMPTY(&ram_list.blocks))
2429 return 0;
2430
2431 QLIST_FOREACH(block, &ram_list.blocks, next) {
2432 ram_addr_t end, next = RAM_ADDR_MAX;
2433
2434 end = block->offset + block->length;
2435
2436 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2437 if (next_block->offset >= end) {
2438 next = MIN(next, next_block->offset);
2439 }
2440 }
2441 if (next - end >= size && next - end < mingap) {
2442 offset = end;
2443 mingap = next - end;
2444 }
2445 }
2446
2447 if (offset == RAM_ADDR_MAX) {
2448 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2449 (uint64_t)size);
2450 abort();
2451 }
2452
2453 return offset;
2454 }
2455
2456 ram_addr_t last_ram_offset(void)
2457 {
2458 RAMBlock *block;
2459 ram_addr_t last = 0;
2460
2461 QLIST_FOREACH(block, &ram_list.blocks, next)
2462 last = MAX(last, block->offset + block->length);
2463
2464 return last;
2465 }
2466
2467 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2468 {
2469 int ret;
2470 QemuOpts *machine_opts;
2471
2472 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2473 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2474 if (machine_opts &&
2475 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2476 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2477 if (ret) {
2478 perror("qemu_madvise");
2479 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2480 "but dump_guest_core=off specified\n");
2481 }
2482 }
2483 }
2484
2485 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2486 {
2487 RAMBlock *new_block, *block;
2488
2489 new_block = NULL;
2490 QLIST_FOREACH(block, &ram_list.blocks, next) {
2491 if (block->offset == addr) {
2492 new_block = block;
2493 break;
2494 }
2495 }
2496 assert(new_block);
2497 assert(!new_block->idstr[0]);
2498
2499 if (dev) {
2500 char *id = qdev_get_dev_path(dev);
2501 if (id) {
2502 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2503 g_free(id);
2504 }
2505 }
2506 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2507
2508 QLIST_FOREACH(block, &ram_list.blocks, next) {
2509 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2510 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2511 new_block->idstr);
2512 abort();
2513 }
2514 }
2515 }
2516
2517 static int memory_try_enable_merging(void *addr, size_t len)
2518 {
2519 QemuOpts *opts;
2520
2521 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2522 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2523 /* disabled by the user */
2524 return 0;
2525 }
2526
2527 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2528 }
2529
2530 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2531 MemoryRegion *mr)
2532 {
2533 RAMBlock *new_block;
2534
2535 size = TARGET_PAGE_ALIGN(size);
2536 new_block = g_malloc0(sizeof(*new_block));
2537
2538 new_block->mr = mr;
2539 new_block->offset = find_ram_offset(size);
2540 if (host) {
2541 new_block->host = host;
2542 new_block->flags |= RAM_PREALLOC_MASK;
2543 } else {
2544 if (mem_path) {
2545 #if defined (__linux__) && !defined(TARGET_S390X)
2546 new_block->host = file_ram_alloc(new_block, size, mem_path);
2547 if (!new_block->host) {
2548 new_block->host = qemu_vmalloc(size);
2549 memory_try_enable_merging(new_block->host, size);
2550 }
2551 #else
2552 fprintf(stderr, "-mem-path option unsupported\n");
2553 exit(1);
2554 #endif
2555 } else {
2556 if (xen_enabled()) {
2557 xen_ram_alloc(new_block->offset, size, mr);
2558 } else if (kvm_enabled()) {
2559 /* some s390/kvm configurations have special constraints */
2560 new_block->host = kvm_vmalloc(size);
2561 } else {
2562 new_block->host = qemu_vmalloc(size);
2563 }
2564 memory_try_enable_merging(new_block->host, size);
2565 }
2566 }
2567 new_block->length = size;
2568
2569 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2570
2571 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2572 last_ram_offset() >> TARGET_PAGE_BITS);
2573 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2574 0, size >> TARGET_PAGE_BITS);
2575 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2576
2577 qemu_ram_setup_dump(new_block->host, size);
2578 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2579
2580 if (kvm_enabled())
2581 kvm_setup_guest_memory(new_block->host, size);
2582
2583 return new_block->offset;
2584 }
2585
2586 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2587 {
2588 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2589 }
2590
2591 void qemu_ram_free_from_ptr(ram_addr_t addr)
2592 {
2593 RAMBlock *block;
2594
2595 QLIST_FOREACH(block, &ram_list.blocks, next) {
2596 if (addr == block->offset) {
2597 QLIST_REMOVE(block, next);
2598 g_free(block);
2599 return;
2600 }
2601 }
2602 }
2603
2604 void qemu_ram_free(ram_addr_t addr)
2605 {
2606 RAMBlock *block;
2607
2608 QLIST_FOREACH(block, &ram_list.blocks, next) {
2609 if (addr == block->offset) {
2610 QLIST_REMOVE(block, next);
2611 if (block->flags & RAM_PREALLOC_MASK) {
2612 ;
2613 } else if (mem_path) {
2614 #if defined (__linux__) && !defined(TARGET_S390X)
2615 if (block->fd) {
2616 munmap(block->host, block->length);
2617 close(block->fd);
2618 } else {
2619 qemu_vfree(block->host);
2620 }
2621 #else
2622 abort();
2623 #endif
2624 } else {
2625 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2626 munmap(block->host, block->length);
2627 #else
2628 if (xen_enabled()) {
2629 xen_invalidate_map_cache_entry(block->host);
2630 } else {
2631 qemu_vfree(block->host);
2632 }
2633 #endif
2634 }
2635 g_free(block);
2636 return;
2637 }
2638 }
2639
2640 }
2641
2642 #ifndef _WIN32
2643 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2644 {
2645 RAMBlock *block;
2646 ram_addr_t offset;
2647 int flags;
2648 void *area, *vaddr;
2649
2650 QLIST_FOREACH(block, &ram_list.blocks, next) {
2651 offset = addr - block->offset;
2652 if (offset < block->length) {
2653 vaddr = block->host + offset;
2654 if (block->flags & RAM_PREALLOC_MASK) {
2655 ;
2656 } else {
2657 flags = MAP_FIXED;
2658 munmap(vaddr, length);
2659 if (mem_path) {
2660 #if defined(__linux__) && !defined(TARGET_S390X)
2661 if (block->fd) {
2662 #ifdef MAP_POPULATE
2663 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2664 MAP_PRIVATE;
2665 #else
2666 flags |= MAP_PRIVATE;
2667 #endif
2668 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669 flags, block->fd, offset);
2670 } else {
2671 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2672 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2673 flags, -1, 0);
2674 }
2675 #else
2676 abort();
2677 #endif
2678 } else {
2679 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2680 flags |= MAP_SHARED | MAP_ANONYMOUS;
2681 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2682 flags, -1, 0);
2683 #else
2684 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2685 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2686 flags, -1, 0);
2687 #endif
2688 }
2689 if (area != vaddr) {
2690 fprintf(stderr, "Could not remap addr: "
2691 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2692 length, addr);
2693 exit(1);
2694 }
2695 memory_try_enable_merging(vaddr, length);
2696 qemu_ram_setup_dump(vaddr, length);
2697 }
2698 return;
2699 }
2700 }
2701 }
2702 #endif /* !_WIN32 */
2703
2704 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2705 With the exception of the softmmu code in this file, this should
2706 only be used for local memory (e.g. video ram) that the device owns,
2707 and knows it isn't going to access beyond the end of the block.
2708
2709 It should not be used for general purpose DMA.
2710 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2711 */
2712 void *qemu_get_ram_ptr(ram_addr_t addr)
2713 {
2714 RAMBlock *block;
2715
2716 QLIST_FOREACH(block, &ram_list.blocks, next) {
2717 if (addr - block->offset < block->length) {
2718 /* Move this entry to to start of the list. */
2719 if (block != QLIST_FIRST(&ram_list.blocks)) {
2720 QLIST_REMOVE(block, next);
2721 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2722 }
2723 if (xen_enabled()) {
2724 /* We need to check if the requested address is in the RAM
2725 * because we don't want to map the entire memory in QEMU.
2726 * In that case just map until the end of the page.
2727 */
2728 if (block->offset == 0) {
2729 return xen_map_cache(addr, 0, 0);
2730 } else if (block->host == NULL) {
2731 block->host =
2732 xen_map_cache(block->offset, block->length, 1);
2733 }
2734 }
2735 return block->host + (addr - block->offset);
2736 }
2737 }
2738
2739 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2740 abort();
2741
2742 return NULL;
2743 }
2744
2745 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2746 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2747 */
2748 void *qemu_safe_ram_ptr(ram_addr_t addr)
2749 {
2750 RAMBlock *block;
2751
2752 QLIST_FOREACH(block, &ram_list.blocks, next) {
2753 if (addr - block->offset < block->length) {
2754 if (xen_enabled()) {
2755 /* We need to check if the requested address is in the RAM
2756 * because we don't want to map the entire memory in QEMU.
2757 * In that case just map until the end of the page.
2758 */
2759 if (block->offset == 0) {
2760 return xen_map_cache(addr, 0, 0);
2761 } else if (block->host == NULL) {
2762 block->host =
2763 xen_map_cache(block->offset, block->length, 1);
2764 }
2765 }
2766 return block->host + (addr - block->offset);
2767 }
2768 }
2769
2770 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2771 abort();
2772
2773 return NULL;
2774 }
2775
2776 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2777 * but takes a size argument */
2778 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2779 {
2780 if (*size == 0) {
2781 return NULL;
2782 }
2783 if (xen_enabled()) {
2784 return xen_map_cache(addr, *size, 1);
2785 } else {
2786 RAMBlock *block;
2787
2788 QLIST_FOREACH(block, &ram_list.blocks, next) {
2789 if (addr - block->offset < block->length) {
2790 if (addr - block->offset + *size > block->length)
2791 *size = block->length - addr + block->offset;
2792 return block->host + (addr - block->offset);
2793 }
2794 }
2795
2796 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2797 abort();
2798 }
2799 }
2800
2801 void qemu_put_ram_ptr(void *addr)
2802 {
2803 trace_qemu_put_ram_ptr(addr);
2804 }
2805
2806 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2807 {
2808 RAMBlock *block;
2809 uint8_t *host = ptr;
2810
2811 if (xen_enabled()) {
2812 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2813 return 0;
2814 }
2815
2816 QLIST_FOREACH(block, &ram_list.blocks, next) {
2817 /* This case append when the block is not mapped. */
2818 if (block->host == NULL) {
2819 continue;
2820 }
2821 if (host - block->host < block->length) {
2822 *ram_addr = block->offset + (host - block->host);
2823 return 0;
2824 }
2825 }
2826
2827 return -1;
2828 }
2829
2830 /* Some of the softmmu routines need to translate from a host pointer
2831 (typically a TLB entry) back to a ram offset. */
2832 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2833 {
2834 ram_addr_t ram_addr;
2835
2836 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2837 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2838 abort();
2839 }
2840 return ram_addr;
2841 }
2842
2843 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2844 unsigned size)
2845 {
2846 #ifdef DEBUG_UNASSIGNED
2847 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2848 #endif
2849 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2850 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2851 #endif
2852 return 0;
2853 }
2854
2855 static void unassigned_mem_write(void *opaque, hwaddr addr,
2856 uint64_t val, unsigned size)
2857 {
2858 #ifdef DEBUG_UNASSIGNED
2859 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2860 #endif
2861 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2862 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2863 #endif
2864 }
2865
2866 static const MemoryRegionOps unassigned_mem_ops = {
2867 .read = unassigned_mem_read,
2868 .write = unassigned_mem_write,
2869 .endianness = DEVICE_NATIVE_ENDIAN,
2870 };
2871
2872 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2873 unsigned size)
2874 {
2875 abort();
2876 }
2877
2878 static void error_mem_write(void *opaque, hwaddr addr,
2879 uint64_t value, unsigned size)
2880 {
2881 abort();
2882 }
2883
2884 static const MemoryRegionOps error_mem_ops = {
2885 .read = error_mem_read,
2886 .write = error_mem_write,
2887 .endianness = DEVICE_NATIVE_ENDIAN,
2888 };
2889
2890 static const MemoryRegionOps rom_mem_ops = {
2891 .read = error_mem_read,
2892 .write = unassigned_mem_write,
2893 .endianness = DEVICE_NATIVE_ENDIAN,
2894 };
2895
2896 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2897 uint64_t val, unsigned size)
2898 {
2899 int dirty_flags;
2900 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2902 #if !defined(CONFIG_USER_ONLY)
2903 tb_invalidate_phys_page_fast(ram_addr, size);
2904 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2905 #endif
2906 }
2907 switch (size) {
2908 case 1:
2909 stb_p(qemu_get_ram_ptr(ram_addr), val);
2910 break;
2911 case 2:
2912 stw_p(qemu_get_ram_ptr(ram_addr), val);
2913 break;
2914 case 4:
2915 stl_p(qemu_get_ram_ptr(ram_addr), val);
2916 break;
2917 default:
2918 abort();
2919 }
2920 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2921 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2922 /* we remove the notdirty callback only if the code has been
2923 flushed */
2924 if (dirty_flags == 0xff)
2925 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2926 }
2927
2928 static const MemoryRegionOps notdirty_mem_ops = {
2929 .read = error_mem_read,
2930 .write = notdirty_mem_write,
2931 .endianness = DEVICE_NATIVE_ENDIAN,
2932 };
2933
2934 /* Generate a debug exception if a watchpoint has been hit. */
2935 static void check_watchpoint(int offset, int len_mask, int flags)
2936 {
2937 CPUArchState *env = cpu_single_env;
2938 target_ulong pc, cs_base;
2939 TranslationBlock *tb;
2940 target_ulong vaddr;
2941 CPUWatchpoint *wp;
2942 int cpu_flags;
2943
2944 if (env->watchpoint_hit) {
2945 /* We re-entered the check after replacing the TB. Now raise
2946 * the debug interrupt so that is will trigger after the
2947 * current instruction. */
2948 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2949 return;
2950 }
2951 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2952 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2953 if ((vaddr == (wp->vaddr & len_mask) ||
2954 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2955 wp->flags |= BP_WATCHPOINT_HIT;
2956 if (!env->watchpoint_hit) {
2957 env->watchpoint_hit = wp;
2958 tb = tb_find_pc(env->mem_io_pc);
2959 if (!tb) {
2960 cpu_abort(env, "check_watchpoint: could not find TB for "
2961 "pc=%p", (void *)env->mem_io_pc);
2962 }
2963 cpu_restore_state(tb, env, env->mem_io_pc);
2964 tb_phys_invalidate(tb, -1);
2965 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2966 env->exception_index = EXCP_DEBUG;
2967 cpu_loop_exit(env);
2968 } else {
2969 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2970 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2971 cpu_resume_from_signal(env, NULL);
2972 }
2973 }
2974 } else {
2975 wp->flags &= ~BP_WATCHPOINT_HIT;
2976 }
2977 }
2978 }
2979
2980 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2981 so these check for a hit then pass through to the normal out-of-line
2982 phys routines. */
2983 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2984 unsigned size)
2985 {
2986 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2987 switch (size) {
2988 case 1: return ldub_phys(addr);
2989 case 2: return lduw_phys(addr);
2990 case 4: return ldl_phys(addr);
2991 default: abort();
2992 }
2993 }
2994
2995 static void watch_mem_write(void *opaque, hwaddr addr,
2996 uint64_t val, unsigned size)
2997 {
2998 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2999 switch (size) {
3000 case 1:
3001 stb_phys(addr, val);
3002 break;
3003 case 2:
3004 stw_phys(addr, val);
3005 break;
3006 case 4:
3007 stl_phys(addr, val);
3008 break;
3009 default: abort();
3010 }
3011 }
3012
3013 static const MemoryRegionOps watch_mem_ops = {
3014 .read = watch_mem_read,
3015 .write = watch_mem_write,
3016 .endianness = DEVICE_NATIVE_ENDIAN,
3017 };
3018
3019 static uint64_t subpage_read(void *opaque, hwaddr addr,
3020 unsigned len)
3021 {
3022 subpage_t *mmio = opaque;
3023 unsigned int idx = SUBPAGE_IDX(addr);
3024 MemoryRegionSection *section;
3025 #if defined(DEBUG_SUBPAGE)
3026 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3027 mmio, len, addr, idx);
3028 #endif
3029
3030 section = &phys_sections[mmio->sub_section[idx]];
3031 addr += mmio->base;
3032 addr -= section->offset_within_address_space;
3033 addr += section->offset_within_region;
3034 return io_mem_read(section->mr, addr, len);
3035 }
3036
3037 static void subpage_write(void *opaque, hwaddr addr,
3038 uint64_t value, unsigned len)
3039 {
3040 subpage_t *mmio = opaque;
3041 unsigned int idx = SUBPAGE_IDX(addr);
3042 MemoryRegionSection *section;
3043 #if defined(DEBUG_SUBPAGE)
3044 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3045 " idx %d value %"PRIx64"\n",
3046 __func__, mmio, len, addr, idx, value);
3047 #endif
3048
3049 section = &phys_sections[mmio->sub_section[idx]];
3050 addr += mmio->base;
3051 addr -= section->offset_within_address_space;
3052 addr += section->offset_within_region;
3053 io_mem_write(section->mr, addr, value, len);
3054 }
3055
3056 static const MemoryRegionOps subpage_ops = {
3057 .read = subpage_read,
3058 .write = subpage_write,
3059 .endianness = DEVICE_NATIVE_ENDIAN,
3060 };
3061
3062 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3063 unsigned size)
3064 {
3065 ram_addr_t raddr = addr;
3066 void *ptr = qemu_get_ram_ptr(raddr);
3067 switch (size) {
3068 case 1: return ldub_p(ptr);
3069 case 2: return lduw_p(ptr);
3070 case 4: return ldl_p(ptr);
3071 default: abort();
3072 }
3073 }
3074
3075 static void subpage_ram_write(void *opaque, hwaddr addr,
3076 uint64_t value, unsigned size)
3077 {
3078 ram_addr_t raddr = addr;
3079 void *ptr = qemu_get_ram_ptr(raddr);
3080 switch (size) {
3081 case 1: return stb_p(ptr, value);
3082 case 2: return stw_p(ptr, value);
3083 case 4: return stl_p(ptr, value);
3084 default: abort();
3085 }
3086 }
3087
3088 static const MemoryRegionOps subpage_ram_ops = {
3089 .read = subpage_ram_read,
3090 .write = subpage_ram_write,
3091 .endianness = DEVICE_NATIVE_ENDIAN,
3092 };
3093
3094 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3095 uint16_t section)
3096 {
3097 int idx, eidx;
3098
3099 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3100 return -1;
3101 idx = SUBPAGE_IDX(start);
3102 eidx = SUBPAGE_IDX(end);
3103 #if defined(DEBUG_SUBPAGE)
3104 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3105 mmio, start, end, idx, eidx, memory);
3106 #endif
3107 if (memory_region_is_ram(phys_sections[section].mr)) {
3108 MemoryRegionSection new_section = phys_sections[section];
3109 new_section.mr = &io_mem_subpage_ram;
3110 section = phys_section_add(&new_section);
3111 }
3112 for (; idx <= eidx; idx++) {
3113 mmio->sub_section[idx] = section;
3114 }
3115
3116 return 0;
3117 }
3118
3119 static subpage_t *subpage_init(hwaddr base)
3120 {
3121 subpage_t *mmio;
3122
3123 mmio = g_malloc0(sizeof(subpage_t));
3124
3125 mmio->base = base;
3126 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3127 "subpage", TARGET_PAGE_SIZE);
3128 mmio->iomem.subpage = true;
3129 #if defined(DEBUG_SUBPAGE)
3130 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3131 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3132 #endif
3133 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3134
3135 return mmio;
3136 }
3137
3138 static uint16_t dummy_section(MemoryRegion *mr)
3139 {
3140 MemoryRegionSection section = {
3141 .mr = mr,
3142 .offset_within_address_space = 0,
3143 .offset_within_region = 0,
3144 .size = UINT64_MAX,
3145 };
3146
3147 return phys_section_add(&section);
3148 }
3149
3150 MemoryRegion *iotlb_to_region(hwaddr index)
3151 {
3152 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3153 }
3154
3155 static void io_mem_init(void)
3156 {
3157 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3158 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3159 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3160 "unassigned", UINT64_MAX);
3161 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3162 "notdirty", UINT64_MAX);
3163 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3164 "subpage-ram", UINT64_MAX);
3165 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3166 "watch", UINT64_MAX);
3167 }
3168
3169 static void mem_begin(MemoryListener *listener)
3170 {
3171 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3172
3173 destroy_all_mappings(d);
3174 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3175 }
3176
3177 static void core_begin(MemoryListener *listener)
3178 {
3179 phys_sections_clear();
3180 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3181 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3182 phys_section_rom = dummy_section(&io_mem_rom);
3183 phys_section_watch = dummy_section(&io_mem_watch);
3184 }
3185
3186 static void tcg_commit(MemoryListener *listener)
3187 {
3188 CPUArchState *env;
3189
3190 /* since each CPU stores ram addresses in its TLB cache, we must
3191 reset the modified entries */
3192 /* XXX: slow ! */
3193 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3194 tlb_flush(env, 1);
3195 }
3196 }
3197
3198 static void core_log_global_start(MemoryListener *listener)
3199 {
3200 cpu_physical_memory_set_dirty_tracking(1);
3201 }
3202
3203 static void core_log_global_stop(MemoryListener *listener)
3204 {
3205 cpu_physical_memory_set_dirty_tracking(0);
3206 }
3207
3208 static void io_region_add(MemoryListener *listener,
3209 MemoryRegionSection *section)
3210 {
3211 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3212
3213 mrio->mr = section->mr;
3214 mrio->offset = section->offset_within_region;
3215 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3216 section->offset_within_address_space, section->size);
3217 ioport_register(&mrio->iorange);
3218 }
3219
3220 static void io_region_del(MemoryListener *listener,
3221 MemoryRegionSection *section)
3222 {
3223 isa_unassign_ioport(section->offset_within_address_space, section->size);
3224 }
3225
3226 static MemoryListener core_memory_listener = {
3227 .begin = core_begin,
3228 .log_global_start = core_log_global_start,
3229 .log_global_stop = core_log_global_stop,
3230 .priority = 1,
3231 };
3232
3233 static MemoryListener io_memory_listener = {
3234 .region_add = io_region_add,
3235 .region_del = io_region_del,
3236 .priority = 0,
3237 };
3238
3239 static MemoryListener tcg_memory_listener = {
3240 .commit = tcg_commit,
3241 };
3242
3243 void address_space_init_dispatch(AddressSpace *as)
3244 {
3245 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3246
3247 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3248 d->listener = (MemoryListener) {
3249 .begin = mem_begin,
3250 .region_add = mem_add,
3251 .region_nop = mem_add,
3252 .priority = 0,
3253 };
3254 as->dispatch = d;
3255 memory_listener_register(&d->listener, as);
3256 }
3257
3258 void address_space_destroy_dispatch(AddressSpace *as)
3259 {
3260 AddressSpaceDispatch *d = as->dispatch;
3261
3262 memory_listener_unregister(&d->listener);
3263 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3264 g_free(d);
3265 as->dispatch = NULL;
3266 }
3267
3268 static void memory_map_init(void)
3269 {
3270 system_memory = g_malloc(sizeof(*system_memory));
3271 memory_region_init(system_memory, "system", INT64_MAX);
3272 address_space_init(&address_space_memory, system_memory);
3273 address_space_memory.name = "memory";
3274
3275 system_io = g_malloc(sizeof(*system_io));
3276 memory_region_init(system_io, "io", 65536);
3277 address_space_init(&address_space_io, system_io);
3278 address_space_io.name = "I/O";
3279
3280 memory_listener_register(&core_memory_listener, &address_space_memory);
3281 memory_listener_register(&io_memory_listener, &address_space_io);
3282 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3283 }
3284
3285 MemoryRegion *get_system_memory(void)
3286 {
3287 return system_memory;
3288 }
3289
3290 MemoryRegion *get_system_io(void)
3291 {
3292 return system_io;
3293 }
3294
3295 #endif /* !defined(CONFIG_USER_ONLY) */
3296
3297 /* physical memory access (slow version, mainly for debug) */
3298 #if defined(CONFIG_USER_ONLY)
3299 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3300 uint8_t *buf, int len, int is_write)
3301 {
3302 int l, flags;
3303 target_ulong page;
3304 void * p;
3305
3306 while (len > 0) {
3307 page = addr & TARGET_PAGE_MASK;
3308 l = (page + TARGET_PAGE_SIZE) - addr;
3309 if (l > len)
3310 l = len;
3311 flags = page_get_flags(page);
3312 if (!(flags & PAGE_VALID))
3313 return -1;
3314 if (is_write) {
3315 if (!(flags & PAGE_WRITE))
3316 return -1;
3317 /* XXX: this code should not depend on lock_user */
3318 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3319 return -1;
3320 memcpy(p, buf, l);
3321 unlock_user(p, addr, l);
3322 } else {
3323 if (!(flags & PAGE_READ))
3324 return -1;
3325 /* XXX: this code should not depend on lock_user */
3326 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3327 return -1;
3328 memcpy(buf, p, l);
3329 unlock_user(p, addr, 0);
3330 }
3331 len -= l;
3332 buf += l;
3333 addr += l;
3334 }
3335 return 0;
3336 }
3337
3338 #else
3339
3340 static void invalidate_and_set_dirty(hwaddr addr,
3341 hwaddr length)
3342 {
3343 if (!cpu_physical_memory_is_dirty(addr)) {
3344 /* invalidate code */
3345 tb_invalidate_phys_page_range(addr, addr + length, 0);
3346 /* set dirty bit */
3347 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3348 }
3349 xen_modified_memory(addr, length);
3350 }
3351
3352 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3353 int len, bool is_write)
3354 {
3355 AddressSpaceDispatch *d = as->dispatch;
3356 int l;
3357 uint8_t *ptr;
3358 uint32_t val;
3359 hwaddr page;
3360 MemoryRegionSection *section;
3361
3362 while (len > 0) {
3363 page = addr & TARGET_PAGE_MASK;
3364 l = (page + TARGET_PAGE_SIZE) - addr;
3365 if (l > len)
3366 l = len;
3367 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3368
3369 if (is_write) {
3370 if (!memory_region_is_ram(section->mr)) {
3371 hwaddr addr1;
3372 addr1 = memory_region_section_addr(section, addr);
3373 /* XXX: could force cpu_single_env to NULL to avoid
3374 potential bugs */
3375 if (l >= 4 && ((addr1 & 3) == 0)) {
3376 /* 32 bit write access */
3377 val = ldl_p(buf);
3378 io_mem_write(section->mr, addr1, val, 4);
3379 l = 4;
3380 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3381 /* 16 bit write access */
3382 val = lduw_p(buf);
3383 io_mem_write(section->mr, addr1, val, 2);
3384 l = 2;
3385 } else {
3386 /* 8 bit write access */
3387 val = ldub_p(buf);
3388 io_mem_write(section->mr, addr1, val, 1);
3389 l = 1;
3390 }
3391 } else if (!section->readonly) {
3392 ram_addr_t addr1;
3393 addr1 = memory_region_get_ram_addr(section->mr)
3394 + memory_region_section_addr(section, addr);
3395 /* RAM case */
3396 ptr = qemu_get_ram_ptr(addr1);
3397 memcpy(ptr, buf, l);
3398 invalidate_and_set_dirty(addr1, l);
3399 qemu_put_ram_ptr(ptr);
3400 }
3401 } else {
3402 if (!(memory_region_is_ram(section->mr) ||
3403 memory_region_is_romd(section->mr))) {
3404 hwaddr addr1;
3405 /* I/O case */
3406 addr1 = memory_region_section_addr(section, addr);
3407 if (l >= 4 && ((addr1 & 3) == 0)) {
3408 /* 32 bit read access */
3409 val = io_mem_read(section->mr, addr1, 4);
3410 stl_p(buf, val);
3411 l = 4;
3412 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3413 /* 16 bit read access */
3414 val = io_mem_read(section->mr, addr1, 2);
3415 stw_p(buf, val);
3416 l = 2;
3417 } else {
3418 /* 8 bit read access */
3419 val = io_mem_read(section->mr, addr1, 1);
3420 stb_p(buf, val);
3421 l = 1;
3422 }
3423 } else {
3424 /* RAM case */
3425 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3426 + memory_region_section_addr(section,
3427 addr));
3428 memcpy(buf, ptr, l);
3429 qemu_put_ram_ptr(ptr);
3430 }
3431 }
3432 len -= l;
3433 buf += l;
3434 addr += l;
3435 }
3436 }
3437
3438 void address_space_write(AddressSpace *as, hwaddr addr,
3439 const uint8_t *buf, int len)
3440 {
3441 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3442 }
3443
3444 /**
3445 * address_space_read: read from an address space.
3446 *
3447 * @as: #AddressSpace to be accessed
3448 * @addr: address within that address space
3449 * @buf: buffer with the data transferred
3450 */
3451 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3452 {
3453 address_space_rw(as, addr, buf, len, false);
3454 }
3455
3456
3457 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3458 int len, int is_write)
3459 {
3460 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3461 }
3462
3463 /* used for ROM loading : can write in RAM and ROM */
3464 void cpu_physical_memory_write_rom(hwaddr addr,
3465 const uint8_t *buf, int len)
3466 {
3467 AddressSpaceDispatch *d = address_space_memory.dispatch;
3468 int l;
3469 uint8_t *ptr;
3470 hwaddr page;
3471 MemoryRegionSection *section;
3472
3473 while (len > 0) {
3474 page = addr & TARGET_PAGE_MASK;
3475 l = (page + TARGET_PAGE_SIZE) - addr;
3476 if (l > len)
3477 l = len;
3478 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3479
3480 if (!(memory_region_is_ram(section->mr) ||
3481 memory_region_is_romd(section->mr))) {
3482 /* do nothing */
3483 } else {
3484 unsigned long addr1;
3485 addr1 = memory_region_get_ram_addr(section->mr)
3486 + memory_region_section_addr(section, addr);
3487 /* ROM/RAM case */
3488 ptr = qemu_get_ram_ptr(addr1);
3489 memcpy(ptr, buf, l);
3490 invalidate_and_set_dirty(addr1, l);
3491 qemu_put_ram_ptr(ptr);
3492 }
3493 len -= l;
3494 buf += l;
3495 addr += l;
3496 }
3497 }
3498
3499 typedef struct {
3500 void *buffer;
3501 hwaddr addr;
3502 hwaddr len;
3503 } BounceBuffer;
3504
3505 static BounceBuffer bounce;
3506
3507 typedef struct MapClient {
3508 void *opaque;
3509 void (*callback)(void *opaque);
3510 QLIST_ENTRY(MapClient) link;
3511 } MapClient;
3512
3513 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3514 = QLIST_HEAD_INITIALIZER(map_client_list);
3515
3516 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3517 {
3518 MapClient *client = g_malloc(sizeof(*client));
3519
3520 client->opaque = opaque;
3521 client->callback = callback;
3522 QLIST_INSERT_HEAD(&map_client_list, client, link);
3523 return client;
3524 }
3525
3526 void cpu_unregister_map_client(void *_client)
3527 {
3528 MapClient *client = (MapClient *)_client;
3529
3530 QLIST_REMOVE(client, link);
3531 g_free(client);
3532 }
3533
3534 static void cpu_notify_map_clients(void)
3535 {
3536 MapClient *client;
3537
3538 while (!QLIST_EMPTY(&map_client_list)) {
3539 client = QLIST_FIRST(&map_client_list);
3540 client->callback(client->opaque);
3541 cpu_unregister_map_client(client);
3542 }
3543 }
3544
3545 /* Map a physical memory region into a host virtual address.
3546 * May map a subset of the requested range, given by and returned in *plen.
3547 * May return NULL if resources needed to perform the mapping are exhausted.
3548 * Use only for reads OR writes - not for read-modify-write operations.
3549 * Use cpu_register_map_client() to know when retrying the map operation is
3550 * likely to succeed.
3551 */
3552 void *address_space_map(AddressSpace *as,
3553 hwaddr addr,
3554 hwaddr *plen,
3555 bool is_write)
3556 {
3557 AddressSpaceDispatch *d = as->dispatch;
3558 hwaddr len = *plen;
3559 hwaddr todo = 0;
3560 int l;
3561 hwaddr page;
3562 MemoryRegionSection *section;
3563 ram_addr_t raddr = RAM_ADDR_MAX;
3564 ram_addr_t rlen;
3565 void *ret;
3566
3567 while (len > 0) {
3568 page = addr & TARGET_PAGE_MASK;
3569 l = (page + TARGET_PAGE_SIZE) - addr;
3570 if (l > len)
3571 l = len;
3572 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3573
3574 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3575 if (todo || bounce.buffer) {
3576 break;
3577 }
3578 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3579 bounce.addr = addr;
3580 bounce.len = l;
3581 if (!is_write) {
3582 address_space_read(as, addr, bounce.buffer, l);
3583 }
3584
3585 *plen = l;
3586 return bounce.buffer;
3587 }
3588 if (!todo) {
3589 raddr = memory_region_get_ram_addr(section->mr)
3590 + memory_region_section_addr(section, addr);
3591 }
3592
3593 len -= l;
3594 addr += l;
3595 todo += l;
3596 }
3597 rlen = todo;
3598 ret = qemu_ram_ptr_length(raddr, &rlen);
3599 *plen = rlen;
3600 return ret;
3601 }
3602
3603 /* Unmaps a memory region previously mapped by address_space_map().
3604 * Will also mark the memory as dirty if is_write == 1. access_len gives
3605 * the amount of memory that was actually read or written by the caller.
3606 */
3607 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3608 int is_write, hwaddr access_len)
3609 {
3610 if (buffer != bounce.buffer) {
3611 if (is_write) {
3612 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3613 while (access_len) {
3614 unsigned l;
3615 l = TARGET_PAGE_SIZE;
3616 if (l > access_len)
3617 l = access_len;
3618 invalidate_and_set_dirty(addr1, l);
3619 addr1 += l;
3620 access_len -= l;
3621 }
3622 }
3623 if (xen_enabled()) {
3624 xen_invalidate_map_cache_entry(buffer);
3625 }
3626 return;
3627 }
3628 if (is_write) {
3629 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3630 }
3631 qemu_vfree(bounce.buffer);
3632 bounce.buffer = NULL;
3633 cpu_notify_map_clients();
3634 }
3635
3636 void *cpu_physical_memory_map(hwaddr addr,
3637 hwaddr *plen,
3638 int is_write)
3639 {
3640 return address_space_map(&address_space_memory, addr, plen, is_write);
3641 }
3642
3643 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3644 int is_write, hwaddr access_len)
3645 {
3646 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3647 }
3648
3649 /* warning: addr must be aligned */
3650 static inline uint32_t ldl_phys_internal(hwaddr addr,
3651 enum device_endian endian)
3652 {
3653 uint8_t *ptr;
3654 uint32_t val;
3655 MemoryRegionSection *section;
3656
3657 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3658
3659 if (!(memory_region_is_ram(section->mr) ||
3660 memory_region_is_romd(section->mr))) {
3661 /* I/O case */
3662 addr = memory_region_section_addr(section, addr);
3663 val = io_mem_read(section->mr, addr, 4);
3664 #if defined(TARGET_WORDS_BIGENDIAN)
3665 if (endian == DEVICE_LITTLE_ENDIAN) {
3666 val = bswap32(val);
3667 }
3668 #else
3669 if (endian == DEVICE_BIG_ENDIAN) {
3670 val = bswap32(val);
3671 }
3672 #endif
3673 } else {
3674 /* RAM case */
3675 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3676 & TARGET_PAGE_MASK)
3677 + memory_region_section_addr(section, addr));
3678 switch (endian) {
3679 case DEVICE_LITTLE_ENDIAN:
3680 val = ldl_le_p(ptr);
3681 break;
3682 case DEVICE_BIG_ENDIAN:
3683 val = ldl_be_p(ptr);
3684 break;
3685 default:
3686 val = ldl_p(ptr);
3687 break;
3688 }
3689 }
3690 return val;
3691 }
3692
3693 uint32_t ldl_phys(hwaddr addr)
3694 {
3695 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3696 }
3697
3698 uint32_t ldl_le_phys(hwaddr addr)
3699 {
3700 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3701 }
3702
3703 uint32_t ldl_be_phys(hwaddr addr)
3704 {
3705 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3706 }
3707
3708 /* warning: addr must be aligned */
3709 static inline uint64_t ldq_phys_internal(hwaddr addr,
3710 enum device_endian endian)
3711 {
3712 uint8_t *ptr;
3713 uint64_t val;
3714 MemoryRegionSection *section;
3715
3716 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3717
3718 if (!(memory_region_is_ram(section->mr) ||
3719 memory_region_is_romd(section->mr))) {
3720 /* I/O case */
3721 addr = memory_region_section_addr(section, addr);
3722
3723 /* XXX This is broken when device endian != cpu endian.
3724 Fix and add "endian" variable check */
3725 #ifdef TARGET_WORDS_BIGENDIAN
3726 val = io_mem_read(section->mr, addr, 4) << 32;
3727 val |= io_mem_read(section->mr, addr + 4, 4);
3728 #else
3729 val = io_mem_read(section->mr, addr, 4);
3730 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3731 #endif
3732 } else {
3733 /* RAM case */
3734 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3735 & TARGET_PAGE_MASK)
3736 + memory_region_section_addr(section, addr));
3737 switch (endian) {
3738 case DEVICE_LITTLE_ENDIAN:
3739 val = ldq_le_p(ptr);
3740 break;
3741 case DEVICE_BIG_ENDIAN:
3742 val = ldq_be_p(ptr);
3743 break;
3744 default:
3745 val = ldq_p(ptr);
3746 break;
3747 }
3748 }
3749 return val;
3750 }
3751
3752 uint64_t ldq_phys(hwaddr addr)
3753 {
3754 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3755 }
3756
3757 uint64_t ldq_le_phys(hwaddr addr)
3758 {
3759 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3760 }
3761
3762 uint64_t ldq_be_phys(hwaddr addr)
3763 {
3764 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3765 }
3766
3767 /* XXX: optimize */
3768 uint32_t ldub_phys(hwaddr addr)
3769 {
3770 uint8_t val;
3771 cpu_physical_memory_read(addr, &val, 1);
3772 return val;
3773 }
3774
3775 /* warning: addr must be aligned */
3776 static inline uint32_t lduw_phys_internal(hwaddr addr,
3777 enum device_endian endian)
3778 {
3779 uint8_t *ptr;
3780 uint64_t val;
3781 MemoryRegionSection *section;
3782
3783 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3784
3785 if (!(memory_region_is_ram(section->mr) ||
3786 memory_region_is_romd(section->mr))) {
3787 /* I/O case */
3788 addr = memory_region_section_addr(section, addr);
3789 val = io_mem_read(section->mr, addr, 2);
3790 #if defined(TARGET_WORDS_BIGENDIAN)
3791 if (endian == DEVICE_LITTLE_ENDIAN) {
3792 val = bswap16(val);
3793 }
3794 #else
3795 if (endian == DEVICE_BIG_ENDIAN) {
3796 val = bswap16(val);
3797 }
3798 #endif
3799 } else {
3800 /* RAM case */
3801 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3802 & TARGET_PAGE_MASK)
3803 + memory_region_section_addr(section, addr));
3804 switch (endian) {
3805 case DEVICE_LITTLE_ENDIAN:
3806 val = lduw_le_p(ptr);
3807 break;
3808 case DEVICE_BIG_ENDIAN:
3809 val = lduw_be_p(ptr);
3810 break;
3811 default:
3812 val = lduw_p(ptr);
3813 break;
3814 }
3815 }
3816 return val;
3817 }
3818
3819 uint32_t lduw_phys(hwaddr addr)
3820 {
3821 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3822 }
3823
3824 uint32_t lduw_le_phys(hwaddr addr)
3825 {
3826 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3827 }
3828
3829 uint32_t lduw_be_phys(hwaddr addr)
3830 {
3831 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3832 }
3833
3834 /* warning: addr must be aligned. The ram page is not masked as dirty
3835 and the code inside is not invalidated. It is useful if the dirty
3836 bits are used to track modified PTEs */
3837 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3838 {
3839 uint8_t *ptr;
3840 MemoryRegionSection *section;
3841
3842 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3843
3844 if (!memory_region_is_ram(section->mr) || section->readonly) {
3845 addr = memory_region_section_addr(section, addr);
3846 if (memory_region_is_ram(section->mr)) {
3847 section = &phys_sections[phys_section_rom];
3848 }
3849 io_mem_write(section->mr, addr, val, 4);
3850 } else {
3851 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3852 & TARGET_PAGE_MASK)
3853 + memory_region_section_addr(section, addr);
3854 ptr = qemu_get_ram_ptr(addr1);
3855 stl_p(ptr, val);
3856
3857 if (unlikely(in_migration)) {
3858 if (!cpu_physical_memory_is_dirty(addr1)) {
3859 /* invalidate code */
3860 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3861 /* set dirty bit */
3862 cpu_physical_memory_set_dirty_flags(
3863 addr1, (0xff & ~CODE_DIRTY_FLAG));
3864 }
3865 }
3866 }
3867 }
3868
3869 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3870 {
3871 uint8_t *ptr;
3872 MemoryRegionSection *section;
3873
3874 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3875
3876 if (!memory_region_is_ram(section->mr) || section->readonly) {
3877 addr = memory_region_section_addr(section, addr);
3878 if (memory_region_is_ram(section->mr)) {
3879 section = &phys_sections[phys_section_rom];
3880 }
3881 #ifdef TARGET_WORDS_BIGENDIAN
3882 io_mem_write(section->mr, addr, val >> 32, 4);
3883 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3884 #else
3885 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3886 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3887 #endif
3888 } else {
3889 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3890 & TARGET_PAGE_MASK)
3891 + memory_region_section_addr(section, addr));
3892 stq_p(ptr, val);
3893 }
3894 }
3895
3896 /* warning: addr must be aligned */
3897 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3898 enum device_endian endian)
3899 {
3900 uint8_t *ptr;
3901 MemoryRegionSection *section;
3902
3903 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3904
3905 if (!memory_region_is_ram(section->mr) || section->readonly) {
3906 addr = memory_region_section_addr(section, addr);
3907 if (memory_region_is_ram(section->mr)) {
3908 section = &phys_sections[phys_section_rom];
3909 }
3910 #if defined(TARGET_WORDS_BIGENDIAN)
3911 if (endian == DEVICE_LITTLE_ENDIAN) {
3912 val = bswap32(val);
3913 }
3914 #else
3915 if (endian == DEVICE_BIG_ENDIAN) {
3916 val = bswap32(val);
3917 }
3918 #endif
3919 io_mem_write(section->mr, addr, val, 4);
3920 } else {
3921 unsigned long addr1;
3922 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3923 + memory_region_section_addr(section, addr);
3924 /* RAM case */
3925 ptr = qemu_get_ram_ptr(addr1);
3926 switch (endian) {
3927 case DEVICE_LITTLE_ENDIAN:
3928 stl_le_p(ptr, val);
3929 break;
3930 case DEVICE_BIG_ENDIAN:
3931 stl_be_p(ptr, val);
3932 break;
3933 default:
3934 stl_p(ptr, val);
3935 break;
3936 }
3937 invalidate_and_set_dirty(addr1, 4);
3938 }
3939 }
3940
3941 void stl_phys(hwaddr addr, uint32_t val)
3942 {
3943 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3944 }
3945
3946 void stl_le_phys(hwaddr addr, uint32_t val)
3947 {
3948 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3949 }
3950
3951 void stl_be_phys(hwaddr addr, uint32_t val)
3952 {
3953 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3954 }
3955
3956 /* XXX: optimize */
3957 void stb_phys(hwaddr addr, uint32_t val)
3958 {
3959 uint8_t v = val;
3960 cpu_physical_memory_write(addr, &v, 1);
3961 }
3962
3963 /* warning: addr must be aligned */
3964 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3965 enum device_endian endian)
3966 {
3967 uint8_t *ptr;
3968 MemoryRegionSection *section;
3969
3970 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3971
3972 if (!memory_region_is_ram(section->mr) || section->readonly) {
3973 addr = memory_region_section_addr(section, addr);
3974 if (memory_region_is_ram(section->mr)) {
3975 section = &phys_sections[phys_section_rom];
3976 }
3977 #if defined(TARGET_WORDS_BIGENDIAN)
3978 if (endian == DEVICE_LITTLE_ENDIAN) {
3979 val = bswap16(val);
3980 }
3981 #else
3982 if (endian == DEVICE_BIG_ENDIAN) {
3983 val = bswap16(val);
3984 }
3985 #endif
3986 io_mem_write(section->mr, addr, val, 2);
3987 } else {
3988 unsigned long addr1;
3989 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3990 + memory_region_section_addr(section, addr);
3991 /* RAM case */
3992 ptr = qemu_get_ram_ptr(addr1);
3993 switch (endian) {
3994 case DEVICE_LITTLE_ENDIAN:
3995 stw_le_p(ptr, val);
3996 break;
3997 case DEVICE_BIG_ENDIAN:
3998 stw_be_p(ptr, val);
3999 break;
4000 default:
4001 stw_p(ptr, val);
4002 break;
4003 }
4004 invalidate_and_set_dirty(addr1, 2);
4005 }
4006 }
4007
4008 void stw_phys(hwaddr addr, uint32_t val)
4009 {
4010 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4011 }
4012
4013 void stw_le_phys(hwaddr addr, uint32_t val)
4014 {
4015 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4016 }
4017
4018 void stw_be_phys(hwaddr addr, uint32_t val)
4019 {
4020 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4021 }
4022
4023 /* XXX: optimize */
4024 void stq_phys(hwaddr addr, uint64_t val)
4025 {
4026 val = tswap64(val);
4027 cpu_physical_memory_write(addr, &val, 8);
4028 }
4029
4030 void stq_le_phys(hwaddr addr, uint64_t val)
4031 {
4032 val = cpu_to_le64(val);
4033 cpu_physical_memory_write(addr, &val, 8);
4034 }
4035
4036 void stq_be_phys(hwaddr addr, uint64_t val)
4037 {
4038 val = cpu_to_be64(val);
4039 cpu_physical_memory_write(addr, &val, 8);
4040 }
4041
4042 /* virtual memory access for debug (includes writing to ROM) */
4043 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4044 uint8_t *buf, int len, int is_write)
4045 {
4046 int l;
4047 hwaddr phys_addr;
4048 target_ulong page;
4049
4050 while (len > 0) {
4051 page = addr & TARGET_PAGE_MASK;
4052 phys_addr = cpu_get_phys_page_debug(env, page);
4053 /* if no physical page mapped, return an error */
4054 if (phys_addr == -1)
4055 return -1;
4056 l = (page + TARGET_PAGE_SIZE) - addr;
4057 if (l > len)
4058 l = len;
4059 phys_addr += (addr & ~TARGET_PAGE_MASK);
4060 if (is_write)
4061 cpu_physical_memory_write_rom(phys_addr, buf, l);
4062 else
4063 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4064 len -= l;
4065 buf += l;
4066 addr += l;
4067 }
4068 return 0;
4069 }
4070 #endif
4071
4072 /* in deterministic execution mode, instructions doing device I/Os
4073 must be at the end of the TB */
4074 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4075 {
4076 TranslationBlock *tb;
4077 uint32_t n, cflags;
4078 target_ulong pc, cs_base;
4079 uint64_t flags;
4080
4081 tb = tb_find_pc(retaddr);
4082 if (!tb) {
4083 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4084 (void *)retaddr);
4085 }
4086 n = env->icount_decr.u16.low + tb->icount;
4087 cpu_restore_state(tb, env, retaddr);
4088 /* Calculate how many instructions had been executed before the fault
4089 occurred. */
4090 n = n - env->icount_decr.u16.low;
4091 /* Generate a new TB ending on the I/O insn. */
4092 n++;
4093 /* On MIPS and SH, delay slot instructions can only be restarted if
4094 they were already the first instruction in the TB. If this is not
4095 the first instruction in a TB then re-execute the preceding
4096 branch. */
4097 #if defined(TARGET_MIPS)
4098 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4099 env->active_tc.PC -= 4;
4100 env->icount_decr.u16.low++;
4101 env->hflags &= ~MIPS_HFLAG_BMASK;
4102 }
4103 #elif defined(TARGET_SH4)
4104 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4105 && n > 1) {
4106 env->pc -= 2;
4107 env->icount_decr.u16.low++;
4108 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4109 }
4110 #endif
4111 /* This should never happen. */
4112 if (n > CF_COUNT_MASK)
4113 cpu_abort(env, "TB too big during recompile");
4114
4115 cflags = n | CF_LAST_IO;
4116 pc = tb->pc;
4117 cs_base = tb->cs_base;
4118 flags = tb->flags;
4119 tb_phys_invalidate(tb, -1);
4120 /* FIXME: In theory this could raise an exception. In practice
4121 we have already translated the block once so it's probably ok. */
4122 tb_gen_code(env, pc, cs_base, flags, cflags);
4123 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4124 the first in the TB) then we end up generating a whole new TB and
4125 repeating the fault, which is horribly inefficient.
4126 Better would be to execute just this insn uncached, or generate a
4127 second new TB. */
4128 cpu_resume_from_signal(env, NULL);
4129 }
4130
4131 #if !defined(CONFIG_USER_ONLY)
4132
4133 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4134 {
4135 int i, target_code_size, max_target_code_size;
4136 int direct_jmp_count, direct_jmp2_count, cross_page;
4137 TranslationBlock *tb;
4138
4139 target_code_size = 0;
4140 max_target_code_size = 0;
4141 cross_page = 0;
4142 direct_jmp_count = 0;
4143 direct_jmp2_count = 0;
4144 for(i = 0; i < nb_tbs; i++) {
4145 tb = &tbs[i];
4146 target_code_size += tb->size;
4147 if (tb->size > max_target_code_size)
4148 max_target_code_size = tb->size;
4149 if (tb->page_addr[1] != -1)
4150 cross_page++;
4151 if (tb->tb_next_offset[0] != 0xffff) {
4152 direct_jmp_count++;
4153 if (tb->tb_next_offset[1] != 0xffff) {
4154 direct_jmp2_count++;
4155 }
4156 }
4157 }
4158 /* XXX: avoid using doubles ? */
4159 cpu_fprintf(f, "Translation buffer state:\n");
4160 cpu_fprintf(f, "gen code size %td/%zd\n",
4161 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4162 cpu_fprintf(f, "TB count %d/%d\n",
4163 nb_tbs, code_gen_max_blocks);
4164 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4165 nb_tbs ? target_code_size / nb_tbs : 0,
4166 max_target_code_size);
4167 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4168 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4169 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4170 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4171 cross_page,
4172 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4173 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4174 direct_jmp_count,
4175 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4176 direct_jmp2_count,
4177 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4178 cpu_fprintf(f, "\nStatistics:\n");
4179 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4180 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4181 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4182 tcg_dump_info(f, cpu_fprintf);
4183 }
4184
4185 /*
4186 * A helper function for the _utterly broken_ virtio device model to find out if
4187 * it's running on a big endian machine. Don't do this at home kids!
4188 */
4189 bool virtio_is_big_endian(void);
4190 bool virtio_is_big_endian(void)
4191 {
4192 #if defined(TARGET_WORDS_BIGENDIAN)
4193 return true;
4194 #else
4195 return false;
4196 #endif
4197 }
4198
4199 #endif
4200
4201 #ifndef CONFIG_USER_ONLY
4202 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4203 {
4204 MemoryRegionSection *section;
4205
4206 section = phys_page_find(address_space_memory.dispatch,
4207 phys_addr >> TARGET_PAGE_BITS);
4208
4209 return !(memory_region_is_ram(section->mr) ||
4210 memory_region_is_romd(section->mr));
4211 }
4212 #endif