]> git.proxmox.com Git - qemu.git/blob - exec.c
exec: make some functions static
[qemu.git] / exec.c
1 /*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
59
60 #include "cputlb.h"
61
62 #include "memory-internal.h"
63
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
67
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
73
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
78
79 #define SMC_BITMAP_USE_THRESHOLD 10
80
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
87
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
94
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
98
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
100
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
103
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
106
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
109
110 #endif
111
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
120
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
132
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
144
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
148
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
151
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
155
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
161
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
163
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
165
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
169
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
173
174 #if !defined(CONFIG_USER_ONLY)
175
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
182
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
186
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
188
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191 static void *qemu_safe_ram_ptr(ram_addr_t addr);
192
193 static MemoryRegion io_mem_watch;
194 #endif
195 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
196 tb_page_addr_t phys_page2);
197
198 /* statistics */
199 static int tb_flush_count;
200 static int tb_phys_invalidate_count;
201
202 #ifdef _WIN32
203 static inline void map_exec(void *addr, long size)
204 {
205 DWORD old_protect;
206 VirtualProtect(addr, size,
207 PAGE_EXECUTE_READWRITE, &old_protect);
208
209 }
210 #else
211 static inline void map_exec(void *addr, long size)
212 {
213 unsigned long start, end, page_size;
214
215 page_size = getpagesize();
216 start = (unsigned long)addr;
217 start &= ~(page_size - 1);
218
219 end = (unsigned long)addr + size;
220 end += page_size - 1;
221 end &= ~(page_size - 1);
222
223 mprotect((void *)start, end - start,
224 PROT_READ | PROT_WRITE | PROT_EXEC);
225 }
226 #endif
227
228 static void page_init(void)
229 {
230 /* NOTE: we can always suppose that qemu_host_page_size >=
231 TARGET_PAGE_SIZE */
232 #ifdef _WIN32
233 {
234 SYSTEM_INFO system_info;
235
236 GetSystemInfo(&system_info);
237 qemu_real_host_page_size = system_info.dwPageSize;
238 }
239 #else
240 qemu_real_host_page_size = getpagesize();
241 #endif
242 if (qemu_host_page_size == 0)
243 qemu_host_page_size = qemu_real_host_page_size;
244 if (qemu_host_page_size < TARGET_PAGE_SIZE)
245 qemu_host_page_size = TARGET_PAGE_SIZE;
246 qemu_host_page_mask = ~(qemu_host_page_size - 1);
247
248 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
249 {
250 #ifdef HAVE_KINFO_GETVMMAP
251 struct kinfo_vmentry *freep;
252 int i, cnt;
253
254 freep = kinfo_getvmmap(getpid(), &cnt);
255 if (freep) {
256 mmap_lock();
257 for (i = 0; i < cnt; i++) {
258 unsigned long startaddr, endaddr;
259
260 startaddr = freep[i].kve_start;
261 endaddr = freep[i].kve_end;
262 if (h2g_valid(startaddr)) {
263 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
264
265 if (h2g_valid(endaddr)) {
266 endaddr = h2g(endaddr);
267 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
268 } else {
269 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
270 endaddr = ~0ul;
271 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
272 #endif
273 }
274 }
275 }
276 free(freep);
277 mmap_unlock();
278 }
279 #else
280 FILE *f;
281
282 last_brk = (unsigned long)sbrk(0);
283
284 f = fopen("/compat/linux/proc/self/maps", "r");
285 if (f) {
286 mmap_lock();
287
288 do {
289 unsigned long startaddr, endaddr;
290 int n;
291
292 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
293
294 if (n == 2 && h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
296
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 } else {
300 endaddr = ~0ul;
301 }
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 }
304 } while (!feof(f));
305
306 fclose(f);
307 mmap_unlock();
308 }
309 #endif
310 }
311 #endif
312 }
313
314 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
315 {
316 PageDesc *pd;
317 void **lp;
318 int i;
319
320 #if defined(CONFIG_USER_ONLY)
321 /* We can't use g_malloc because it may recurse into a locked mutex. */
322 # define ALLOC(P, SIZE) \
323 do { \
324 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
325 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
326 } while (0)
327 #else
328 # define ALLOC(P, SIZE) \
329 do { P = g_malloc0(SIZE); } while (0)
330 #endif
331
332 /* Level 1. Always allocated. */
333 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
334
335 /* Level 2..N-1. */
336 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
337 void **p = *lp;
338
339 if (p == NULL) {
340 if (!alloc) {
341 return NULL;
342 }
343 ALLOC(p, sizeof(void *) * L2_SIZE);
344 *lp = p;
345 }
346
347 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
348 }
349
350 pd = *lp;
351 if (pd == NULL) {
352 if (!alloc) {
353 return NULL;
354 }
355 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
356 *lp = pd;
357 }
358
359 #undef ALLOC
360
361 return pd + (index & (L2_SIZE - 1));
362 }
363
364 static inline PageDesc *page_find(tb_page_addr_t index)
365 {
366 return page_find_alloc(index, 0);
367 }
368
369 #if !defined(CONFIG_USER_ONLY)
370
371 static void phys_map_node_reserve(unsigned nodes)
372 {
373 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
374 typedef PhysPageEntry Node[L2_SIZE];
375 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
376 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
377 phys_map_nodes_nb + nodes);
378 phys_map_nodes = g_renew(Node, phys_map_nodes,
379 phys_map_nodes_nb_alloc);
380 }
381 }
382
383 static uint16_t phys_map_node_alloc(void)
384 {
385 unsigned i;
386 uint16_t ret;
387
388 ret = phys_map_nodes_nb++;
389 assert(ret != PHYS_MAP_NODE_NIL);
390 assert(ret != phys_map_nodes_nb_alloc);
391 for (i = 0; i < L2_SIZE; ++i) {
392 phys_map_nodes[ret][i].is_leaf = 0;
393 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
394 }
395 return ret;
396 }
397
398 static void phys_map_nodes_reset(void)
399 {
400 phys_map_nodes_nb = 0;
401 }
402
403
404 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
405 hwaddr *nb, uint16_t leaf,
406 int level)
407 {
408 PhysPageEntry *p;
409 int i;
410 hwaddr step = (hwaddr)1 << (level * L2_BITS);
411
412 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
413 lp->ptr = phys_map_node_alloc();
414 p = phys_map_nodes[lp->ptr];
415 if (level == 0) {
416 for (i = 0; i < L2_SIZE; i++) {
417 p[i].is_leaf = 1;
418 p[i].ptr = phys_section_unassigned;
419 }
420 }
421 } else {
422 p = phys_map_nodes[lp->ptr];
423 }
424 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
425
426 while (*nb && lp < &p[L2_SIZE]) {
427 if ((*index & (step - 1)) == 0 && *nb >= step) {
428 lp->is_leaf = true;
429 lp->ptr = leaf;
430 *index += step;
431 *nb -= step;
432 } else {
433 phys_page_set_level(lp, index, nb, leaf, level - 1);
434 }
435 ++lp;
436 }
437 }
438
439 static void phys_page_set(AddressSpaceDispatch *d,
440 hwaddr index, hwaddr nb,
441 uint16_t leaf)
442 {
443 /* Wildly overreserve - it doesn't matter much. */
444 phys_map_node_reserve(3 * P_L2_LEVELS);
445
446 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
447 }
448
449 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
450 {
451 PhysPageEntry lp = d->phys_map;
452 PhysPageEntry *p;
453 int i;
454 uint16_t s_index = phys_section_unassigned;
455
456 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
457 if (lp.ptr == PHYS_MAP_NODE_NIL) {
458 goto not_found;
459 }
460 p = phys_map_nodes[lp.ptr];
461 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
462 }
463
464 s_index = lp.ptr;
465 not_found:
466 return &phys_sections[s_index];
467 }
468
469 bool memory_region_is_unassigned(MemoryRegion *mr)
470 {
471 return mr != &io_mem_ram && mr != &io_mem_rom
472 && mr != &io_mem_notdirty && !mr->rom_device
473 && mr != &io_mem_watch;
474 }
475
476 #define mmap_lock() do { } while(0)
477 #define mmap_unlock() do { } while(0)
478 #endif
479
480 #if defined(CONFIG_USER_ONLY)
481 /* Currently it is not recommended to allocate big chunks of data in
482 user mode. It will change when a dedicated libc will be used. */
483 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
484 region in which the guest needs to run. Revisit this. */
485 #define USE_STATIC_CODE_GEN_BUFFER
486 #endif
487
488 /* ??? Should configure for this, not list operating systems here. */
489 #if (defined(__linux__) \
490 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
491 || defined(__DragonFly__) || defined(__OpenBSD__) \
492 || defined(__NetBSD__))
493 # define USE_MMAP
494 #endif
495
496 /* Minimum size of the code gen buffer. This number is randomly chosen,
497 but not so small that we can't have a fair number of TB's live. */
498 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
499
500 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
501 indicated, this is constrained by the range of direct branches on the
502 host cpu, as used by the TCG implementation of goto_tb. */
503 #if defined(__x86_64__)
504 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
505 #elif defined(__sparc__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__arm__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
509 #elif defined(__s390x__)
510 /* We have a +- 4GB range on the branches; leave some slop. */
511 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
512 #else
513 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
514 #endif
515
516 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
517
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
519 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
520 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
521
522 static inline size_t size_code_gen_buffer(size_t tb_size)
523 {
524 /* Size the buffer. */
525 if (tb_size == 0) {
526 #ifdef USE_STATIC_CODE_GEN_BUFFER
527 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
528 #else
529 /* ??? Needs adjustments. */
530 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
531 static buffer, we could size this on RESERVED_VA, on the text
532 segment size of the executable, or continue to use the default. */
533 tb_size = (unsigned long)(ram_size / 4);
534 #endif
535 }
536 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
538 }
539 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
540 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
541 }
542 code_gen_buffer_size = tb_size;
543 return tb_size;
544 }
545
546 #ifdef USE_STATIC_CODE_GEN_BUFFER
547 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
548 __attribute__((aligned(CODE_GEN_ALIGN)));
549
550 static inline void *alloc_code_gen_buffer(void)
551 {
552 map_exec(static_code_gen_buffer, code_gen_buffer_size);
553 return static_code_gen_buffer;
554 }
555 #elif defined(USE_MMAP)
556 static inline void *alloc_code_gen_buffer(void)
557 {
558 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
559 uintptr_t start = 0;
560 void *buf;
561
562 /* Constrain the position of the buffer based on the host cpu.
563 Note that these addresses are chosen in concert with the
564 addresses assigned in the relevant linker script file. */
565 # if defined(__PIE__) || defined(__PIC__)
566 /* Don't bother setting a preferred location if we're building
567 a position-independent executable. We're more likely to get
568 an address near the main executable if we let the kernel
569 choose the address. */
570 # elif defined(__x86_64__) && defined(MAP_32BIT)
571 /* Force the memory down into low memory with the executable.
572 Leave the choice of exact location with the kernel. */
573 flags |= MAP_32BIT;
574 /* Cannot expect to map more than 800MB in low memory. */
575 if (code_gen_buffer_size > 800u * 1024 * 1024) {
576 code_gen_buffer_size = 800u * 1024 * 1024;
577 }
578 # elif defined(__sparc__)
579 start = 0x40000000ul;
580 # elif defined(__s390x__)
581 start = 0x90000000ul;
582 # endif
583
584 buf = mmap((void *)start, code_gen_buffer_size,
585 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
586 return buf == MAP_FAILED ? NULL : buf;
587 }
588 #else
589 static inline void *alloc_code_gen_buffer(void)
590 {
591 void *buf = g_malloc(code_gen_buffer_size);
592 if (buf) {
593 map_exec(buf, code_gen_buffer_size);
594 }
595 return buf;
596 }
597 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
598
599 static inline void code_gen_alloc(size_t tb_size)
600 {
601 code_gen_buffer_size = size_code_gen_buffer(tb_size);
602 code_gen_buffer = alloc_code_gen_buffer();
603 if (code_gen_buffer == NULL) {
604 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
605 exit(1);
606 }
607
608 /* Steal room for the prologue at the end of the buffer. This ensures
609 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
610 from TB's to the prologue are going to be in range. It also means
611 that we don't need to mark (additional) portions of the data segment
612 as executable. */
613 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
614 code_gen_buffer_size -= 1024;
615
616 code_gen_buffer_max_size = code_gen_buffer_size -
617 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
618 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
619 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
620 }
621
622 /* Must be called before using the QEMU cpus. 'tb_size' is the size
623 (in bytes) allocated to the translation buffer. Zero means default
624 size. */
625 void tcg_exec_init(unsigned long tb_size)
626 {
627 cpu_gen_init();
628 code_gen_alloc(tb_size);
629 code_gen_ptr = code_gen_buffer;
630 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
631 page_init();
632 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
633 /* There's no guest base to take into account, so go ahead and
634 initialize the prologue now. */
635 tcg_prologue_init(&tcg_ctx);
636 #endif
637 }
638
639 bool tcg_enabled(void)
640 {
641 return code_gen_buffer != NULL;
642 }
643
644 void cpu_exec_init_all(void)
645 {
646 #if !defined(CONFIG_USER_ONLY)
647 memory_map_init();
648 io_mem_init();
649 #endif
650 }
651
652 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
653
654 static int cpu_common_post_load(void *opaque, int version_id)
655 {
656 CPUArchState *env = opaque;
657
658 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
659 version_id is increased. */
660 env->interrupt_request &= ~0x01;
661 tlb_flush(env, 1);
662
663 return 0;
664 }
665
666 static const VMStateDescription vmstate_cpu_common = {
667 .name = "cpu_common",
668 .version_id = 1,
669 .minimum_version_id = 1,
670 .minimum_version_id_old = 1,
671 .post_load = cpu_common_post_load,
672 .fields = (VMStateField []) {
673 VMSTATE_UINT32(halted, CPUArchState),
674 VMSTATE_UINT32(interrupt_request, CPUArchState),
675 VMSTATE_END_OF_LIST()
676 }
677 };
678 #endif
679
680 CPUArchState *qemu_get_cpu(int cpu)
681 {
682 CPUArchState *env = first_cpu;
683
684 while (env) {
685 if (env->cpu_index == cpu)
686 break;
687 env = env->next_cpu;
688 }
689
690 return env;
691 }
692
693 void cpu_exec_init(CPUArchState *env)
694 {
695 CPUArchState **penv;
696 int cpu_index;
697
698 #if defined(CONFIG_USER_ONLY)
699 cpu_list_lock();
700 #endif
701 env->next_cpu = NULL;
702 penv = &first_cpu;
703 cpu_index = 0;
704 while (*penv != NULL) {
705 penv = &(*penv)->next_cpu;
706 cpu_index++;
707 }
708 env->cpu_index = cpu_index;
709 env->numa_node = 0;
710 QTAILQ_INIT(&env->breakpoints);
711 QTAILQ_INIT(&env->watchpoints);
712 #ifndef CONFIG_USER_ONLY
713 env->thread_id = qemu_get_thread_id();
714 #endif
715 *penv = env;
716 #if defined(CONFIG_USER_ONLY)
717 cpu_list_unlock();
718 #endif
719 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
720 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
721 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
722 cpu_save, cpu_load, env);
723 #endif
724 }
725
726 /* Allocate a new translation block. Flush the translation buffer if
727 too many translation blocks or too much generated code. */
728 static TranslationBlock *tb_alloc(target_ulong pc)
729 {
730 TranslationBlock *tb;
731
732 if (nb_tbs >= code_gen_max_blocks ||
733 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
734 return NULL;
735 tb = &tbs[nb_tbs++];
736 tb->pc = pc;
737 tb->cflags = 0;
738 return tb;
739 }
740
741 void tb_free(TranslationBlock *tb)
742 {
743 /* In practice this is mostly used for single use temporary TB
744 Ignore the hard cases and just back up if this TB happens to
745 be the last one generated. */
746 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
747 code_gen_ptr = tb->tc_ptr;
748 nb_tbs--;
749 }
750 }
751
752 static inline void invalidate_page_bitmap(PageDesc *p)
753 {
754 if (p->code_bitmap) {
755 g_free(p->code_bitmap);
756 p->code_bitmap = NULL;
757 }
758 p->code_write_count = 0;
759 }
760
761 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
762
763 static void page_flush_tb_1 (int level, void **lp)
764 {
765 int i;
766
767 if (*lp == NULL) {
768 return;
769 }
770 if (level == 0) {
771 PageDesc *pd = *lp;
772 for (i = 0; i < L2_SIZE; ++i) {
773 pd[i].first_tb = NULL;
774 invalidate_page_bitmap(pd + i);
775 }
776 } else {
777 void **pp = *lp;
778 for (i = 0; i < L2_SIZE; ++i) {
779 page_flush_tb_1 (level - 1, pp + i);
780 }
781 }
782 }
783
784 static void page_flush_tb(void)
785 {
786 int i;
787 for (i = 0; i < V_L1_SIZE; i++) {
788 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
789 }
790 }
791
792 /* flush all the translation blocks */
793 /* XXX: tb_flush is currently not thread safe */
794 void tb_flush(CPUArchState *env1)
795 {
796 CPUArchState *env;
797 #if defined(DEBUG_FLUSH)
798 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
799 (unsigned long)(code_gen_ptr - code_gen_buffer),
800 nb_tbs, nb_tbs > 0 ?
801 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
802 #endif
803 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
804 cpu_abort(env1, "Internal error: code buffer overflow\n");
805
806 nb_tbs = 0;
807
808 for(env = first_cpu; env != NULL; env = env->next_cpu) {
809 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
810 }
811
812 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
813 page_flush_tb();
814
815 code_gen_ptr = code_gen_buffer;
816 /* XXX: flush processor icache at this point if cache flush is
817 expensive */
818 tb_flush_count++;
819 }
820
821 #ifdef DEBUG_TB_CHECK
822
823 static void tb_invalidate_check(target_ulong address)
824 {
825 TranslationBlock *tb;
826 int i;
827 address &= TARGET_PAGE_MASK;
828 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
829 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
830 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
831 address >= tb->pc + tb->size)) {
832 printf("ERROR invalidate: address=" TARGET_FMT_lx
833 " PC=%08lx size=%04x\n",
834 address, (long)tb->pc, tb->size);
835 }
836 }
837 }
838 }
839
840 /* verify that all the pages have correct rights for code */
841 static void tb_page_check(void)
842 {
843 TranslationBlock *tb;
844 int i, flags1, flags2;
845
846 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
847 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
848 flags1 = page_get_flags(tb->pc);
849 flags2 = page_get_flags(tb->pc + tb->size - 1);
850 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
851 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
852 (long)tb->pc, tb->size, flags1, flags2);
853 }
854 }
855 }
856 }
857
858 #endif
859
860 /* invalidate one TB */
861 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
862 int next_offset)
863 {
864 TranslationBlock *tb1;
865 for(;;) {
866 tb1 = *ptb;
867 if (tb1 == tb) {
868 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
869 break;
870 }
871 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
872 }
873 }
874
875 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
876 {
877 TranslationBlock *tb1;
878 unsigned int n1;
879
880 for(;;) {
881 tb1 = *ptb;
882 n1 = (uintptr_t)tb1 & 3;
883 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
884 if (tb1 == tb) {
885 *ptb = tb1->page_next[n1];
886 break;
887 }
888 ptb = &tb1->page_next[n1];
889 }
890 }
891
892 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
893 {
894 TranslationBlock *tb1, **ptb;
895 unsigned int n1;
896
897 ptb = &tb->jmp_next[n];
898 tb1 = *ptb;
899 if (tb1) {
900 /* find tb(n) in circular list */
901 for(;;) {
902 tb1 = *ptb;
903 n1 = (uintptr_t)tb1 & 3;
904 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
905 if (n1 == n && tb1 == tb)
906 break;
907 if (n1 == 2) {
908 ptb = &tb1->jmp_first;
909 } else {
910 ptb = &tb1->jmp_next[n1];
911 }
912 }
913 /* now we can suppress tb(n) from the list */
914 *ptb = tb->jmp_next[n];
915
916 tb->jmp_next[n] = NULL;
917 }
918 }
919
920 /* reset the jump entry 'n' of a TB so that it is not chained to
921 another TB */
922 static inline void tb_reset_jump(TranslationBlock *tb, int n)
923 {
924 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
925 }
926
927 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
928 {
929 CPUArchState *env;
930 PageDesc *p;
931 unsigned int h, n1;
932 tb_page_addr_t phys_pc;
933 TranslationBlock *tb1, *tb2;
934
935 /* remove the TB from the hash list */
936 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
937 h = tb_phys_hash_func(phys_pc);
938 tb_remove(&tb_phys_hash[h], tb,
939 offsetof(TranslationBlock, phys_hash_next));
940
941 /* remove the TB from the page list */
942 if (tb->page_addr[0] != page_addr) {
943 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
944 tb_page_remove(&p->first_tb, tb);
945 invalidate_page_bitmap(p);
946 }
947 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
948 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
951 }
952
953 tb_invalidated_flag = 1;
954
955 /* remove the TB from the hash list */
956 h = tb_jmp_cache_hash_func(tb->pc);
957 for(env = first_cpu; env != NULL; env = env->next_cpu) {
958 if (env->tb_jmp_cache[h] == tb)
959 env->tb_jmp_cache[h] = NULL;
960 }
961
962 /* suppress this TB from the two jump lists */
963 tb_jmp_remove(tb, 0);
964 tb_jmp_remove(tb, 1);
965
966 /* suppress any remaining jumps to this TB */
967 tb1 = tb->jmp_first;
968 for(;;) {
969 n1 = (uintptr_t)tb1 & 3;
970 if (n1 == 2)
971 break;
972 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
973 tb2 = tb1->jmp_next[n1];
974 tb_reset_jump(tb1, n1);
975 tb1->jmp_next[n1] = NULL;
976 tb1 = tb2;
977 }
978 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
979
980 tb_phys_invalidate_count++;
981 }
982
983 static inline void set_bits(uint8_t *tab, int start, int len)
984 {
985 int end, mask, end1;
986
987 end = start + len;
988 tab += start >> 3;
989 mask = 0xff << (start & 7);
990 if ((start & ~7) == (end & ~7)) {
991 if (start < end) {
992 mask &= ~(0xff << (end & 7));
993 *tab |= mask;
994 }
995 } else {
996 *tab++ |= mask;
997 start = (start + 8) & ~7;
998 end1 = end & ~7;
999 while (start < end1) {
1000 *tab++ = 0xff;
1001 start += 8;
1002 }
1003 if (start < end) {
1004 mask = ~(0xff << (end & 7));
1005 *tab |= mask;
1006 }
1007 }
1008 }
1009
1010 static void build_page_bitmap(PageDesc *p)
1011 {
1012 int n, tb_start, tb_end;
1013 TranslationBlock *tb;
1014
1015 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1016
1017 tb = p->first_tb;
1018 while (tb != NULL) {
1019 n = (uintptr_t)tb & 3;
1020 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1021 /* NOTE: this is subtle as a TB may span two physical pages */
1022 if (n == 0) {
1023 /* NOTE: tb_end may be after the end of the page, but
1024 it is not a problem */
1025 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1026 tb_end = tb_start + tb->size;
1027 if (tb_end > TARGET_PAGE_SIZE)
1028 tb_end = TARGET_PAGE_SIZE;
1029 } else {
1030 tb_start = 0;
1031 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1032 }
1033 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1034 tb = tb->page_next[n];
1035 }
1036 }
1037
1038 TranslationBlock *tb_gen_code(CPUArchState *env,
1039 target_ulong pc, target_ulong cs_base,
1040 int flags, int cflags)
1041 {
1042 TranslationBlock *tb;
1043 uint8_t *tc_ptr;
1044 tb_page_addr_t phys_pc, phys_page2;
1045 target_ulong virt_page2;
1046 int code_gen_size;
1047
1048 phys_pc = get_page_addr_code(env, pc);
1049 tb = tb_alloc(pc);
1050 if (!tb) {
1051 /* flush must be done */
1052 tb_flush(env);
1053 /* cannot fail at this point */
1054 tb = tb_alloc(pc);
1055 /* Don't forget to invalidate previous TB info. */
1056 tb_invalidated_flag = 1;
1057 }
1058 tc_ptr = code_gen_ptr;
1059 tb->tc_ptr = tc_ptr;
1060 tb->cs_base = cs_base;
1061 tb->flags = flags;
1062 tb->cflags = cflags;
1063 cpu_gen_code(env, tb, &code_gen_size);
1064 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1065 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1066
1067 /* check next page if needed */
1068 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1069 phys_page2 = -1;
1070 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1071 phys_page2 = get_page_addr_code(env, virt_page2);
1072 }
1073 tb_link_page(tb, phys_pc, phys_page2);
1074 return tb;
1075 }
1076
1077 /*
1078 * Invalidate all TBs which intersect with the target physical address range
1079 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1080 * 'is_cpu_write_access' should be true if called from a real cpu write
1081 * access: the virtual CPU will exit the current TB if code is modified inside
1082 * this TB.
1083 */
1084 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1085 int is_cpu_write_access)
1086 {
1087 while (start < end) {
1088 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1089 start &= TARGET_PAGE_MASK;
1090 start += TARGET_PAGE_SIZE;
1091 }
1092 }
1093
1094 /*
1095 * Invalidate all TBs which intersect with the target physical address range
1096 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1097 * 'is_cpu_write_access' should be true if called from a real cpu write
1098 * access: the virtual CPU will exit the current TB if code is modified inside
1099 * this TB.
1100 */
1101 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1102 int is_cpu_write_access)
1103 {
1104 TranslationBlock *tb, *tb_next, *saved_tb;
1105 CPUArchState *env = cpu_single_env;
1106 tb_page_addr_t tb_start, tb_end;
1107 PageDesc *p;
1108 int n;
1109 #ifdef TARGET_HAS_PRECISE_SMC
1110 int current_tb_not_found = is_cpu_write_access;
1111 TranslationBlock *current_tb = NULL;
1112 int current_tb_modified = 0;
1113 target_ulong current_pc = 0;
1114 target_ulong current_cs_base = 0;
1115 int current_flags = 0;
1116 #endif /* TARGET_HAS_PRECISE_SMC */
1117
1118 p = page_find(start >> TARGET_PAGE_BITS);
1119 if (!p)
1120 return;
1121 if (!p->code_bitmap &&
1122 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1123 is_cpu_write_access) {
1124 /* build code bitmap */
1125 build_page_bitmap(p);
1126 }
1127
1128 /* we remove all the TBs in the range [start, end[ */
1129 /* XXX: see if in some cases it could be faster to invalidate all the code */
1130 tb = p->first_tb;
1131 while (tb != NULL) {
1132 n = (uintptr_t)tb & 3;
1133 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1134 tb_next = tb->page_next[n];
1135 /* NOTE: this is subtle as a TB may span two physical pages */
1136 if (n == 0) {
1137 /* NOTE: tb_end may be after the end of the page, but
1138 it is not a problem */
1139 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1140 tb_end = tb_start + tb->size;
1141 } else {
1142 tb_start = tb->page_addr[1];
1143 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1144 }
1145 if (!(tb_end <= start || tb_start >= end)) {
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 if (current_tb_not_found) {
1148 current_tb_not_found = 0;
1149 current_tb = NULL;
1150 if (env->mem_io_pc) {
1151 /* now we have a real cpu fault */
1152 current_tb = tb_find_pc(env->mem_io_pc);
1153 }
1154 }
1155 if (current_tb == tb &&
1156 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1157 /* If we are modifying the current TB, we must stop
1158 its execution. We could be more precise by checking
1159 that the modification is after the current PC, but it
1160 would require a specialized function to partially
1161 restore the CPU state */
1162
1163 current_tb_modified = 1;
1164 cpu_restore_state(current_tb, env, env->mem_io_pc);
1165 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1166 &current_flags);
1167 }
1168 #endif /* TARGET_HAS_PRECISE_SMC */
1169 /* we need to do that to handle the case where a signal
1170 occurs while doing tb_phys_invalidate() */
1171 saved_tb = NULL;
1172 if (env) {
1173 saved_tb = env->current_tb;
1174 env->current_tb = NULL;
1175 }
1176 tb_phys_invalidate(tb, -1);
1177 if (env) {
1178 env->current_tb = saved_tb;
1179 if (env->interrupt_request && env->current_tb)
1180 cpu_interrupt(env, env->interrupt_request);
1181 }
1182 }
1183 tb = tb_next;
1184 }
1185 #if !defined(CONFIG_USER_ONLY)
1186 /* if no code remaining, no need to continue to use slow writes */
1187 if (!p->first_tb) {
1188 invalidate_page_bitmap(p);
1189 if (is_cpu_write_access) {
1190 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1191 }
1192 }
1193 #endif
1194 #ifdef TARGET_HAS_PRECISE_SMC
1195 if (current_tb_modified) {
1196 /* we generate a block containing just the instruction
1197 modifying the memory. It will ensure that it cannot modify
1198 itself */
1199 env->current_tb = NULL;
1200 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1201 cpu_resume_from_signal(env, NULL);
1202 }
1203 #endif
1204 }
1205
1206 /* len must be <= 8 and start must be a multiple of len */
1207 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1208 {
1209 PageDesc *p;
1210 int offset, b;
1211 #if 0
1212 if (1) {
1213 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1214 cpu_single_env->mem_io_vaddr, len,
1215 cpu_single_env->eip,
1216 cpu_single_env->eip +
1217 (intptr_t)cpu_single_env->segs[R_CS].base);
1218 }
1219 #endif
1220 p = page_find(start >> TARGET_PAGE_BITS);
1221 if (!p)
1222 return;
1223 if (p->code_bitmap) {
1224 offset = start & ~TARGET_PAGE_MASK;
1225 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1226 if (b & ((1 << len) - 1))
1227 goto do_invalidate;
1228 } else {
1229 do_invalidate:
1230 tb_invalidate_phys_page_range(start, start + len, 1);
1231 }
1232 }
1233
1234 #if !defined(CONFIG_SOFTMMU)
1235 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1236 uintptr_t pc, void *puc)
1237 {
1238 TranslationBlock *tb;
1239 PageDesc *p;
1240 int n;
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 TranslationBlock *current_tb = NULL;
1243 CPUArchState *env = cpu_single_env;
1244 int current_tb_modified = 0;
1245 target_ulong current_pc = 0;
1246 target_ulong current_cs_base = 0;
1247 int current_flags = 0;
1248 #endif
1249
1250 addr &= TARGET_PAGE_MASK;
1251 p = page_find(addr >> TARGET_PAGE_BITS);
1252 if (!p)
1253 return;
1254 tb = p->first_tb;
1255 #ifdef TARGET_HAS_PRECISE_SMC
1256 if (tb && pc != 0) {
1257 current_tb = tb_find_pc(pc);
1258 }
1259 #endif
1260 while (tb != NULL) {
1261 n = (uintptr_t)tb & 3;
1262 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb == tb &&
1265 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1266 /* If we are modifying the current TB, we must stop
1267 its execution. We could be more precise by checking
1268 that the modification is after the current PC, but it
1269 would require a specialized function to partially
1270 restore the CPU state */
1271
1272 current_tb_modified = 1;
1273 cpu_restore_state(current_tb, env, pc);
1274 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1275 &current_flags);
1276 }
1277 #endif /* TARGET_HAS_PRECISE_SMC */
1278 tb_phys_invalidate(tb, addr);
1279 tb = tb->page_next[n];
1280 }
1281 p->first_tb = NULL;
1282 #ifdef TARGET_HAS_PRECISE_SMC
1283 if (current_tb_modified) {
1284 /* we generate a block containing just the instruction
1285 modifying the memory. It will ensure that it cannot modify
1286 itself */
1287 env->current_tb = NULL;
1288 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1289 cpu_resume_from_signal(env, puc);
1290 }
1291 #endif
1292 }
1293 #endif
1294
1295 /* add the tb in the target page and protect it if necessary */
1296 static inline void tb_alloc_page(TranslationBlock *tb,
1297 unsigned int n, tb_page_addr_t page_addr)
1298 {
1299 PageDesc *p;
1300 #ifndef CONFIG_USER_ONLY
1301 bool page_already_protected;
1302 #endif
1303
1304 tb->page_addr[n] = page_addr;
1305 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1306 tb->page_next[n] = p->first_tb;
1307 #ifndef CONFIG_USER_ONLY
1308 page_already_protected = p->first_tb != NULL;
1309 #endif
1310 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1311 invalidate_page_bitmap(p);
1312
1313 #if defined(TARGET_HAS_SMC) || 1
1314
1315 #if defined(CONFIG_USER_ONLY)
1316 if (p->flags & PAGE_WRITE) {
1317 target_ulong addr;
1318 PageDesc *p2;
1319 int prot;
1320
1321 /* force the host page as non writable (writes will have a
1322 page fault + mprotect overhead) */
1323 page_addr &= qemu_host_page_mask;
1324 prot = 0;
1325 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1326 addr += TARGET_PAGE_SIZE) {
1327
1328 p2 = page_find (addr >> TARGET_PAGE_BITS);
1329 if (!p2)
1330 continue;
1331 prot |= p2->flags;
1332 p2->flags &= ~PAGE_WRITE;
1333 }
1334 mprotect(g2h(page_addr), qemu_host_page_size,
1335 (prot & PAGE_BITS) & ~PAGE_WRITE);
1336 #ifdef DEBUG_TB_INVALIDATE
1337 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1338 page_addr);
1339 #endif
1340 }
1341 #else
1342 /* if some code is already present, then the pages are already
1343 protected. So we handle the case where only the first TB is
1344 allocated in a physical page */
1345 if (!page_already_protected) {
1346 tlb_protect_code(page_addr);
1347 }
1348 #endif
1349
1350 #endif /* TARGET_HAS_SMC */
1351 }
1352
1353 /* add a new TB and link it to the physical page tables. phys_page2 is
1354 (-1) to indicate that only one page contains the TB. */
1355 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1356 tb_page_addr_t phys_page2)
1357 {
1358 unsigned int h;
1359 TranslationBlock **ptb;
1360
1361 /* Grab the mmap lock to stop another thread invalidating this TB
1362 before we are done. */
1363 mmap_lock();
1364 /* add in the physical hash table */
1365 h = tb_phys_hash_func(phys_pc);
1366 ptb = &tb_phys_hash[h];
1367 tb->phys_hash_next = *ptb;
1368 *ptb = tb;
1369
1370 /* add in the page list */
1371 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1372 if (phys_page2 != -1)
1373 tb_alloc_page(tb, 1, phys_page2);
1374 else
1375 tb->page_addr[1] = -1;
1376
1377 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1378 tb->jmp_next[0] = NULL;
1379 tb->jmp_next[1] = NULL;
1380
1381 /* init original jump addresses */
1382 if (tb->tb_next_offset[0] != 0xffff)
1383 tb_reset_jump(tb, 0);
1384 if (tb->tb_next_offset[1] != 0xffff)
1385 tb_reset_jump(tb, 1);
1386
1387 #ifdef DEBUG_TB_CHECK
1388 tb_page_check();
1389 #endif
1390 mmap_unlock();
1391 }
1392
1393 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1394 tb[1].tc_ptr. Return NULL if not found */
1395 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1396 {
1397 int m_min, m_max, m;
1398 uintptr_t v;
1399 TranslationBlock *tb;
1400
1401 if (nb_tbs <= 0)
1402 return NULL;
1403 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1404 tc_ptr >= (uintptr_t)code_gen_ptr) {
1405 return NULL;
1406 }
1407 /* binary search (cf Knuth) */
1408 m_min = 0;
1409 m_max = nb_tbs - 1;
1410 while (m_min <= m_max) {
1411 m = (m_min + m_max) >> 1;
1412 tb = &tbs[m];
1413 v = (uintptr_t)tb->tc_ptr;
1414 if (v == tc_ptr)
1415 return tb;
1416 else if (tc_ptr < v) {
1417 m_max = m - 1;
1418 } else {
1419 m_min = m + 1;
1420 }
1421 }
1422 return &tbs[m_max];
1423 }
1424
1425 static void tb_reset_jump_recursive(TranslationBlock *tb);
1426
1427 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1428 {
1429 TranslationBlock *tb1, *tb_next, **ptb;
1430 unsigned int n1;
1431
1432 tb1 = tb->jmp_next[n];
1433 if (tb1 != NULL) {
1434 /* find head of list */
1435 for(;;) {
1436 n1 = (uintptr_t)tb1 & 3;
1437 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1438 if (n1 == 2)
1439 break;
1440 tb1 = tb1->jmp_next[n1];
1441 }
1442 /* we are now sure now that tb jumps to tb1 */
1443 tb_next = tb1;
1444
1445 /* remove tb from the jmp_first list */
1446 ptb = &tb_next->jmp_first;
1447 for(;;) {
1448 tb1 = *ptb;
1449 n1 = (uintptr_t)tb1 & 3;
1450 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1451 if (n1 == n && tb1 == tb)
1452 break;
1453 ptb = &tb1->jmp_next[n1];
1454 }
1455 *ptb = tb->jmp_next[n];
1456 tb->jmp_next[n] = NULL;
1457
1458 /* suppress the jump to next tb in generated code */
1459 tb_reset_jump(tb, n);
1460
1461 /* suppress jumps in the tb on which we could have jumped */
1462 tb_reset_jump_recursive(tb_next);
1463 }
1464 }
1465
1466 static void tb_reset_jump_recursive(TranslationBlock *tb)
1467 {
1468 tb_reset_jump_recursive2(tb, 0);
1469 tb_reset_jump_recursive2(tb, 1);
1470 }
1471
1472 #if defined(TARGET_HAS_ICE)
1473 #if defined(CONFIG_USER_ONLY)
1474 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1475 {
1476 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1477 }
1478 #else
1479 void tb_invalidate_phys_addr(hwaddr addr)
1480 {
1481 ram_addr_t ram_addr;
1482 MemoryRegionSection *section;
1483
1484 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1485 if (!(memory_region_is_ram(section->mr)
1486 || (section->mr->rom_device && section->mr->readable))) {
1487 return;
1488 }
1489 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1490 + memory_region_section_addr(section, addr);
1491 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1492 }
1493
1494 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1495 {
1496 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1497 (pc & ~TARGET_PAGE_MASK));
1498 }
1499 #endif
1500 #endif /* TARGET_HAS_ICE */
1501
1502 #if defined(CONFIG_USER_ONLY)
1503 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1504
1505 {
1506 }
1507
1508 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1509 int flags, CPUWatchpoint **watchpoint)
1510 {
1511 return -ENOSYS;
1512 }
1513 #else
1514 /* Add a watchpoint. */
1515 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1516 int flags, CPUWatchpoint **watchpoint)
1517 {
1518 target_ulong len_mask = ~(len - 1);
1519 CPUWatchpoint *wp;
1520
1521 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1522 if ((len & (len - 1)) || (addr & ~len_mask) ||
1523 len == 0 || len > TARGET_PAGE_SIZE) {
1524 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1525 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1526 return -EINVAL;
1527 }
1528 wp = g_malloc(sizeof(*wp));
1529
1530 wp->vaddr = addr;
1531 wp->len_mask = len_mask;
1532 wp->flags = flags;
1533
1534 /* keep all GDB-injected watchpoints in front */
1535 if (flags & BP_GDB)
1536 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1537 else
1538 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1539
1540 tlb_flush_page(env, addr);
1541
1542 if (watchpoint)
1543 *watchpoint = wp;
1544 return 0;
1545 }
1546
1547 /* Remove a specific watchpoint. */
1548 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1549 int flags)
1550 {
1551 target_ulong len_mask = ~(len - 1);
1552 CPUWatchpoint *wp;
1553
1554 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1555 if (addr == wp->vaddr && len_mask == wp->len_mask
1556 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1557 cpu_watchpoint_remove_by_ref(env, wp);
1558 return 0;
1559 }
1560 }
1561 return -ENOENT;
1562 }
1563
1564 /* Remove a specific watchpoint by reference. */
1565 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1566 {
1567 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1568
1569 tlb_flush_page(env, watchpoint->vaddr);
1570
1571 g_free(watchpoint);
1572 }
1573
1574 /* Remove all matching watchpoints. */
1575 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1576 {
1577 CPUWatchpoint *wp, *next;
1578
1579 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1580 if (wp->flags & mask)
1581 cpu_watchpoint_remove_by_ref(env, wp);
1582 }
1583 }
1584 #endif
1585
1586 /* Add a breakpoint. */
1587 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1588 CPUBreakpoint **breakpoint)
1589 {
1590 #if defined(TARGET_HAS_ICE)
1591 CPUBreakpoint *bp;
1592
1593 bp = g_malloc(sizeof(*bp));
1594
1595 bp->pc = pc;
1596 bp->flags = flags;
1597
1598 /* keep all GDB-injected breakpoints in front */
1599 if (flags & BP_GDB)
1600 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1601 else
1602 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1603
1604 breakpoint_invalidate(env, pc);
1605
1606 if (breakpoint)
1607 *breakpoint = bp;
1608 return 0;
1609 #else
1610 return -ENOSYS;
1611 #endif
1612 }
1613
1614 /* Remove a specific breakpoint. */
1615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1616 {
1617 #if defined(TARGET_HAS_ICE)
1618 CPUBreakpoint *bp;
1619
1620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1621 if (bp->pc == pc && bp->flags == flags) {
1622 cpu_breakpoint_remove_by_ref(env, bp);
1623 return 0;
1624 }
1625 }
1626 return -ENOENT;
1627 #else
1628 return -ENOSYS;
1629 #endif
1630 }
1631
1632 /* Remove a specific breakpoint by reference. */
1633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1634 {
1635 #if defined(TARGET_HAS_ICE)
1636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1637
1638 breakpoint_invalidate(env, breakpoint->pc);
1639
1640 g_free(breakpoint);
1641 #endif
1642 }
1643
1644 /* Remove all matching breakpoints. */
1645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1646 {
1647 #if defined(TARGET_HAS_ICE)
1648 CPUBreakpoint *bp, *next;
1649
1650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1651 if (bp->flags & mask)
1652 cpu_breakpoint_remove_by_ref(env, bp);
1653 }
1654 #endif
1655 }
1656
1657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1658 CPU loop after each instruction */
1659 void cpu_single_step(CPUArchState *env, int enabled)
1660 {
1661 #if defined(TARGET_HAS_ICE)
1662 if (env->singlestep_enabled != enabled) {
1663 env->singlestep_enabled = enabled;
1664 if (kvm_enabled())
1665 kvm_update_guest_debug(env, 0);
1666 else {
1667 /* must flush all the translated code to avoid inconsistencies */
1668 /* XXX: only flush what is necessary */
1669 tb_flush(env);
1670 }
1671 }
1672 #endif
1673 }
1674
1675 static void cpu_unlink_tb(CPUArchState *env)
1676 {
1677 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1678 problem and hope the cpu will stop of its own accord. For userspace
1679 emulation this often isn't actually as bad as it sounds. Often
1680 signals are used primarily to interrupt blocking syscalls. */
1681 TranslationBlock *tb;
1682 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1683
1684 spin_lock(&interrupt_lock);
1685 tb = env->current_tb;
1686 /* if the cpu is currently executing code, we must unlink it and
1687 all the potentially executing TB */
1688 if (tb) {
1689 env->current_tb = NULL;
1690 tb_reset_jump_recursive(tb);
1691 }
1692 spin_unlock(&interrupt_lock);
1693 }
1694
1695 #ifndef CONFIG_USER_ONLY
1696 /* mask must never be zero, except for A20 change call */
1697 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1698 {
1699 int old_mask;
1700
1701 old_mask = env->interrupt_request;
1702 env->interrupt_request |= mask;
1703
1704 /*
1705 * If called from iothread context, wake the target cpu in
1706 * case its halted.
1707 */
1708 if (!qemu_cpu_is_self(env)) {
1709 qemu_cpu_kick(env);
1710 return;
1711 }
1712
1713 if (use_icount) {
1714 env->icount_decr.u16.high = 0xffff;
1715 if (!can_do_io(env)
1716 && (mask & ~old_mask) != 0) {
1717 cpu_abort(env, "Raised interrupt while not in I/O function");
1718 }
1719 } else {
1720 cpu_unlink_tb(env);
1721 }
1722 }
1723
1724 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1725
1726 #else /* CONFIG_USER_ONLY */
1727
1728 void cpu_interrupt(CPUArchState *env, int mask)
1729 {
1730 env->interrupt_request |= mask;
1731 cpu_unlink_tb(env);
1732 }
1733 #endif /* CONFIG_USER_ONLY */
1734
1735 void cpu_reset_interrupt(CPUArchState *env, int mask)
1736 {
1737 env->interrupt_request &= ~mask;
1738 }
1739
1740 void cpu_exit(CPUArchState *env)
1741 {
1742 env->exit_request = 1;
1743 cpu_unlink_tb(env);
1744 }
1745
1746 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1747 {
1748 va_list ap;
1749 va_list ap2;
1750
1751 va_start(ap, fmt);
1752 va_copy(ap2, ap);
1753 fprintf(stderr, "qemu: fatal: ");
1754 vfprintf(stderr, fmt, ap);
1755 fprintf(stderr, "\n");
1756 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1757 if (qemu_log_enabled()) {
1758 qemu_log("qemu: fatal: ");
1759 qemu_log_vprintf(fmt, ap2);
1760 qemu_log("\n");
1761 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1762 qemu_log_flush();
1763 qemu_log_close();
1764 }
1765 va_end(ap2);
1766 va_end(ap);
1767 #if defined(CONFIG_USER_ONLY)
1768 {
1769 struct sigaction act;
1770 sigfillset(&act.sa_mask);
1771 act.sa_handler = SIG_DFL;
1772 sigaction(SIGABRT, &act, NULL);
1773 }
1774 #endif
1775 abort();
1776 }
1777
1778 CPUArchState *cpu_copy(CPUArchState *env)
1779 {
1780 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1781 CPUArchState *next_cpu = new_env->next_cpu;
1782 int cpu_index = new_env->cpu_index;
1783 #if defined(TARGET_HAS_ICE)
1784 CPUBreakpoint *bp;
1785 CPUWatchpoint *wp;
1786 #endif
1787
1788 memcpy(new_env, env, sizeof(CPUArchState));
1789
1790 /* Preserve chaining and index. */
1791 new_env->next_cpu = next_cpu;
1792 new_env->cpu_index = cpu_index;
1793
1794 /* Clone all break/watchpoints.
1795 Note: Once we support ptrace with hw-debug register access, make sure
1796 BP_CPU break/watchpoints are handled correctly on clone. */
1797 QTAILQ_INIT(&env->breakpoints);
1798 QTAILQ_INIT(&env->watchpoints);
1799 #if defined(TARGET_HAS_ICE)
1800 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1801 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1802 }
1803 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1804 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1805 wp->flags, NULL);
1806 }
1807 #endif
1808
1809 return new_env;
1810 }
1811
1812 #if !defined(CONFIG_USER_ONLY)
1813 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1814 {
1815 unsigned int i;
1816
1817 /* Discard jump cache entries for any tb which might potentially
1818 overlap the flushed page. */
1819 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1820 memset (&env->tb_jmp_cache[i], 0,
1821 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1822
1823 i = tb_jmp_cache_hash_page(addr);
1824 memset (&env->tb_jmp_cache[i], 0,
1825 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1826 }
1827
1828 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1829 uintptr_t length)
1830 {
1831 uintptr_t start1;
1832
1833 /* we modify the TLB cache so that the dirty bit will be set again
1834 when accessing the range */
1835 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1836 /* Check that we don't span multiple blocks - this breaks the
1837 address comparisons below. */
1838 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1839 != (end - 1) - start) {
1840 abort();
1841 }
1842 cpu_tlb_reset_dirty_all(start1, length);
1843
1844 }
1845
1846 /* Note: start and end must be within the same ram block. */
1847 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1848 int dirty_flags)
1849 {
1850 uintptr_t length;
1851
1852 start &= TARGET_PAGE_MASK;
1853 end = TARGET_PAGE_ALIGN(end);
1854
1855 length = end - start;
1856 if (length == 0)
1857 return;
1858 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1859
1860 if (tcg_enabled()) {
1861 tlb_reset_dirty_range_all(start, end, length);
1862 }
1863 }
1864
1865 static int cpu_physical_memory_set_dirty_tracking(int enable)
1866 {
1867 int ret = 0;
1868 in_migration = enable;
1869 return ret;
1870 }
1871
1872 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1873 MemoryRegionSection *section,
1874 target_ulong vaddr,
1875 hwaddr paddr,
1876 int prot,
1877 target_ulong *address)
1878 {
1879 hwaddr iotlb;
1880 CPUWatchpoint *wp;
1881
1882 if (memory_region_is_ram(section->mr)) {
1883 /* Normal RAM. */
1884 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1885 + memory_region_section_addr(section, paddr);
1886 if (!section->readonly) {
1887 iotlb |= phys_section_notdirty;
1888 } else {
1889 iotlb |= phys_section_rom;
1890 }
1891 } else {
1892 /* IO handlers are currently passed a physical address.
1893 It would be nice to pass an offset from the base address
1894 of that region. This would avoid having to special case RAM,
1895 and avoid full address decoding in every device.
1896 We can't use the high bits of pd for this because
1897 IO_MEM_ROMD uses these as a ram address. */
1898 iotlb = section - phys_sections;
1899 iotlb += memory_region_section_addr(section, paddr);
1900 }
1901
1902 /* Make accesses to pages with watchpoints go via the
1903 watchpoint trap routines. */
1904 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1905 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1906 /* Avoid trapping reads of pages with a write breakpoint. */
1907 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1908 iotlb = phys_section_watch + paddr;
1909 *address |= TLB_MMIO;
1910 break;
1911 }
1912 }
1913 }
1914
1915 return iotlb;
1916 }
1917
1918 #else
1919 /*
1920 * Walks guest process memory "regions" one by one
1921 * and calls callback function 'fn' for each region.
1922 */
1923
1924 struct walk_memory_regions_data
1925 {
1926 walk_memory_regions_fn fn;
1927 void *priv;
1928 uintptr_t start;
1929 int prot;
1930 };
1931
1932 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1933 abi_ulong end, int new_prot)
1934 {
1935 if (data->start != -1ul) {
1936 int rc = data->fn(data->priv, data->start, end, data->prot);
1937 if (rc != 0) {
1938 return rc;
1939 }
1940 }
1941
1942 data->start = (new_prot ? end : -1ul);
1943 data->prot = new_prot;
1944
1945 return 0;
1946 }
1947
1948 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1949 abi_ulong base, int level, void **lp)
1950 {
1951 abi_ulong pa;
1952 int i, rc;
1953
1954 if (*lp == NULL) {
1955 return walk_memory_regions_end(data, base, 0);
1956 }
1957
1958 if (level == 0) {
1959 PageDesc *pd = *lp;
1960 for (i = 0; i < L2_SIZE; ++i) {
1961 int prot = pd[i].flags;
1962
1963 pa = base | (i << TARGET_PAGE_BITS);
1964 if (prot != data->prot) {
1965 rc = walk_memory_regions_end(data, pa, prot);
1966 if (rc != 0) {
1967 return rc;
1968 }
1969 }
1970 }
1971 } else {
1972 void **pp = *lp;
1973 for (i = 0; i < L2_SIZE; ++i) {
1974 pa = base | ((abi_ulong)i <<
1975 (TARGET_PAGE_BITS + L2_BITS * level));
1976 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1977 if (rc != 0) {
1978 return rc;
1979 }
1980 }
1981 }
1982
1983 return 0;
1984 }
1985
1986 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1987 {
1988 struct walk_memory_regions_data data;
1989 uintptr_t i;
1990
1991 data.fn = fn;
1992 data.priv = priv;
1993 data.start = -1ul;
1994 data.prot = 0;
1995
1996 for (i = 0; i < V_L1_SIZE; i++) {
1997 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1998 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1999 if (rc != 0) {
2000 return rc;
2001 }
2002 }
2003
2004 return walk_memory_regions_end(&data, 0, 0);
2005 }
2006
2007 static int dump_region(void *priv, abi_ulong start,
2008 abi_ulong end, unsigned long prot)
2009 {
2010 FILE *f = (FILE *)priv;
2011
2012 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2013 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2014 start, end, end - start,
2015 ((prot & PAGE_READ) ? 'r' : '-'),
2016 ((prot & PAGE_WRITE) ? 'w' : '-'),
2017 ((prot & PAGE_EXEC) ? 'x' : '-'));
2018
2019 return (0);
2020 }
2021
2022 /* dump memory mappings */
2023 void page_dump(FILE *f)
2024 {
2025 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2026 "start", "end", "size", "prot");
2027 walk_memory_regions(f, dump_region);
2028 }
2029
2030 int page_get_flags(target_ulong address)
2031 {
2032 PageDesc *p;
2033
2034 p = page_find(address >> TARGET_PAGE_BITS);
2035 if (!p)
2036 return 0;
2037 return p->flags;
2038 }
2039
2040 /* Modify the flags of a page and invalidate the code if necessary.
2041 The flag PAGE_WRITE_ORG is positioned automatically depending
2042 on PAGE_WRITE. The mmap_lock should already be held. */
2043 void page_set_flags(target_ulong start, target_ulong end, int flags)
2044 {
2045 target_ulong addr, len;
2046
2047 /* This function should never be called with addresses outside the
2048 guest address space. If this assert fires, it probably indicates
2049 a missing call to h2g_valid. */
2050 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2051 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2052 #endif
2053 assert(start < end);
2054
2055 start = start & TARGET_PAGE_MASK;
2056 end = TARGET_PAGE_ALIGN(end);
2057
2058 if (flags & PAGE_WRITE) {
2059 flags |= PAGE_WRITE_ORG;
2060 }
2061
2062 for (addr = start, len = end - start;
2063 len != 0;
2064 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2065 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2066
2067 /* If the write protection bit is set, then we invalidate
2068 the code inside. */
2069 if (!(p->flags & PAGE_WRITE) &&
2070 (flags & PAGE_WRITE) &&
2071 p->first_tb) {
2072 tb_invalidate_phys_page(addr, 0, NULL);
2073 }
2074 p->flags = flags;
2075 }
2076 }
2077
2078 int page_check_range(target_ulong start, target_ulong len, int flags)
2079 {
2080 PageDesc *p;
2081 target_ulong end;
2082 target_ulong addr;
2083
2084 /* This function should never be called with addresses outside the
2085 guest address space. If this assert fires, it probably indicates
2086 a missing call to h2g_valid. */
2087 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2088 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2089 #endif
2090
2091 if (len == 0) {
2092 return 0;
2093 }
2094 if (start + len - 1 < start) {
2095 /* We've wrapped around. */
2096 return -1;
2097 }
2098
2099 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2100 start = start & TARGET_PAGE_MASK;
2101
2102 for (addr = start, len = end - start;
2103 len != 0;
2104 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2105 p = page_find(addr >> TARGET_PAGE_BITS);
2106 if( !p )
2107 return -1;
2108 if( !(p->flags & PAGE_VALID) )
2109 return -1;
2110
2111 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2112 return -1;
2113 if (flags & PAGE_WRITE) {
2114 if (!(p->flags & PAGE_WRITE_ORG))
2115 return -1;
2116 /* unprotect the page if it was put read-only because it
2117 contains translated code */
2118 if (!(p->flags & PAGE_WRITE)) {
2119 if (!page_unprotect(addr, 0, NULL))
2120 return -1;
2121 }
2122 return 0;
2123 }
2124 }
2125 return 0;
2126 }
2127
2128 /* called from signal handler: invalidate the code and unprotect the
2129 page. Return TRUE if the fault was successfully handled. */
2130 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2131 {
2132 unsigned int prot;
2133 PageDesc *p;
2134 target_ulong host_start, host_end, addr;
2135
2136 /* Technically this isn't safe inside a signal handler. However we
2137 know this only ever happens in a synchronous SEGV handler, so in
2138 practice it seems to be ok. */
2139 mmap_lock();
2140
2141 p = page_find(address >> TARGET_PAGE_BITS);
2142 if (!p) {
2143 mmap_unlock();
2144 return 0;
2145 }
2146
2147 /* if the page was really writable, then we change its
2148 protection back to writable */
2149 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2150 host_start = address & qemu_host_page_mask;
2151 host_end = host_start + qemu_host_page_size;
2152
2153 prot = 0;
2154 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2155 p = page_find(addr >> TARGET_PAGE_BITS);
2156 p->flags |= PAGE_WRITE;
2157 prot |= p->flags;
2158
2159 /* and since the content will be modified, we must invalidate
2160 the corresponding translated code. */
2161 tb_invalidate_phys_page(addr, pc, puc);
2162 #ifdef DEBUG_TB_CHECK
2163 tb_invalidate_check(addr);
2164 #endif
2165 }
2166 mprotect((void *)g2h(host_start), qemu_host_page_size,
2167 prot & PAGE_BITS);
2168
2169 mmap_unlock();
2170 return 1;
2171 }
2172 mmap_unlock();
2173 return 0;
2174 }
2175 #endif /* defined(CONFIG_USER_ONLY) */
2176
2177 #if !defined(CONFIG_USER_ONLY)
2178
2179 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2180 typedef struct subpage_t {
2181 MemoryRegion iomem;
2182 hwaddr base;
2183 uint16_t sub_section[TARGET_PAGE_SIZE];
2184 } subpage_t;
2185
2186 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2187 uint16_t section);
2188 static subpage_t *subpage_init(hwaddr base);
2189 static void destroy_page_desc(uint16_t section_index)
2190 {
2191 MemoryRegionSection *section = &phys_sections[section_index];
2192 MemoryRegion *mr = section->mr;
2193
2194 if (mr->subpage) {
2195 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2196 memory_region_destroy(&subpage->iomem);
2197 g_free(subpage);
2198 }
2199 }
2200
2201 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2202 {
2203 unsigned i;
2204 PhysPageEntry *p;
2205
2206 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2207 return;
2208 }
2209
2210 p = phys_map_nodes[lp->ptr];
2211 for (i = 0; i < L2_SIZE; ++i) {
2212 if (!p[i].is_leaf) {
2213 destroy_l2_mapping(&p[i], level - 1);
2214 } else {
2215 destroy_page_desc(p[i].ptr);
2216 }
2217 }
2218 lp->is_leaf = 0;
2219 lp->ptr = PHYS_MAP_NODE_NIL;
2220 }
2221
2222 static void destroy_all_mappings(AddressSpaceDispatch *d)
2223 {
2224 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2225 phys_map_nodes_reset();
2226 }
2227
2228 static uint16_t phys_section_add(MemoryRegionSection *section)
2229 {
2230 if (phys_sections_nb == phys_sections_nb_alloc) {
2231 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2232 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2233 phys_sections_nb_alloc);
2234 }
2235 phys_sections[phys_sections_nb] = *section;
2236 return phys_sections_nb++;
2237 }
2238
2239 static void phys_sections_clear(void)
2240 {
2241 phys_sections_nb = 0;
2242 }
2243
2244 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2245 {
2246 subpage_t *subpage;
2247 hwaddr base = section->offset_within_address_space
2248 & TARGET_PAGE_MASK;
2249 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2250 MemoryRegionSection subsection = {
2251 .offset_within_address_space = base,
2252 .size = TARGET_PAGE_SIZE,
2253 };
2254 hwaddr start, end;
2255
2256 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2257
2258 if (!(existing->mr->subpage)) {
2259 subpage = subpage_init(base);
2260 subsection.mr = &subpage->iomem;
2261 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2262 phys_section_add(&subsection));
2263 } else {
2264 subpage = container_of(existing->mr, subpage_t, iomem);
2265 }
2266 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2267 end = start + section->size - 1;
2268 subpage_register(subpage, start, end, phys_section_add(section));
2269 }
2270
2271
2272 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2273 {
2274 hwaddr start_addr = section->offset_within_address_space;
2275 ram_addr_t size = section->size;
2276 hwaddr addr;
2277 uint16_t section_index = phys_section_add(section);
2278
2279 assert(size);
2280
2281 addr = start_addr;
2282 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2283 section_index);
2284 }
2285
2286 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2287 {
2288 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2289 MemoryRegionSection now = *section, remain = *section;
2290
2291 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2292 || (now.size < TARGET_PAGE_SIZE)) {
2293 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2294 - now.offset_within_address_space,
2295 now.size);
2296 register_subpage(d, &now);
2297 remain.size -= now.size;
2298 remain.offset_within_address_space += now.size;
2299 remain.offset_within_region += now.size;
2300 }
2301 while (remain.size >= TARGET_PAGE_SIZE) {
2302 now = remain;
2303 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2304 now.size = TARGET_PAGE_SIZE;
2305 register_subpage(d, &now);
2306 } else {
2307 now.size &= TARGET_PAGE_MASK;
2308 register_multipage(d, &now);
2309 }
2310 remain.size -= now.size;
2311 remain.offset_within_address_space += now.size;
2312 remain.offset_within_region += now.size;
2313 }
2314 now = remain;
2315 if (now.size) {
2316 register_subpage(d, &now);
2317 }
2318 }
2319
2320 void qemu_flush_coalesced_mmio_buffer(void)
2321 {
2322 if (kvm_enabled())
2323 kvm_flush_coalesced_mmio_buffer();
2324 }
2325
2326 #if defined(__linux__) && !defined(TARGET_S390X)
2327
2328 #include <sys/vfs.h>
2329
2330 #define HUGETLBFS_MAGIC 0x958458f6
2331
2332 static long gethugepagesize(const char *path)
2333 {
2334 struct statfs fs;
2335 int ret;
2336
2337 do {
2338 ret = statfs(path, &fs);
2339 } while (ret != 0 && errno == EINTR);
2340
2341 if (ret != 0) {
2342 perror(path);
2343 return 0;
2344 }
2345
2346 if (fs.f_type != HUGETLBFS_MAGIC)
2347 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2348
2349 return fs.f_bsize;
2350 }
2351
2352 static void *file_ram_alloc(RAMBlock *block,
2353 ram_addr_t memory,
2354 const char *path)
2355 {
2356 char *filename;
2357 void *area;
2358 int fd;
2359 #ifdef MAP_POPULATE
2360 int flags;
2361 #endif
2362 unsigned long hpagesize;
2363
2364 hpagesize = gethugepagesize(path);
2365 if (!hpagesize) {
2366 return NULL;
2367 }
2368
2369 if (memory < hpagesize) {
2370 return NULL;
2371 }
2372
2373 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2374 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2375 return NULL;
2376 }
2377
2378 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2379 return NULL;
2380 }
2381
2382 fd = mkstemp(filename);
2383 if (fd < 0) {
2384 perror("unable to create backing store for hugepages");
2385 free(filename);
2386 return NULL;
2387 }
2388 unlink(filename);
2389 free(filename);
2390
2391 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2392
2393 /*
2394 * ftruncate is not supported by hugetlbfs in older
2395 * hosts, so don't bother bailing out on errors.
2396 * If anything goes wrong with it under other filesystems,
2397 * mmap will fail.
2398 */
2399 if (ftruncate(fd, memory))
2400 perror("ftruncate");
2401
2402 #ifdef MAP_POPULATE
2403 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2404 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2405 * to sidestep this quirk.
2406 */
2407 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2408 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2409 #else
2410 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2411 #endif
2412 if (area == MAP_FAILED) {
2413 perror("file_ram_alloc: can't mmap RAM pages");
2414 close(fd);
2415 return (NULL);
2416 }
2417 block->fd = fd;
2418 return area;
2419 }
2420 #endif
2421
2422 static ram_addr_t find_ram_offset(ram_addr_t size)
2423 {
2424 RAMBlock *block, *next_block;
2425 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2426
2427 if (QLIST_EMPTY(&ram_list.blocks))
2428 return 0;
2429
2430 QLIST_FOREACH(block, &ram_list.blocks, next) {
2431 ram_addr_t end, next = RAM_ADDR_MAX;
2432
2433 end = block->offset + block->length;
2434
2435 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2436 if (next_block->offset >= end) {
2437 next = MIN(next, next_block->offset);
2438 }
2439 }
2440 if (next - end >= size && next - end < mingap) {
2441 offset = end;
2442 mingap = next - end;
2443 }
2444 }
2445
2446 if (offset == RAM_ADDR_MAX) {
2447 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2448 (uint64_t)size);
2449 abort();
2450 }
2451
2452 return offset;
2453 }
2454
2455 ram_addr_t last_ram_offset(void)
2456 {
2457 RAMBlock *block;
2458 ram_addr_t last = 0;
2459
2460 QLIST_FOREACH(block, &ram_list.blocks, next)
2461 last = MAX(last, block->offset + block->length);
2462
2463 return last;
2464 }
2465
2466 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2467 {
2468 int ret;
2469 QemuOpts *machine_opts;
2470
2471 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2472 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2473 if (machine_opts &&
2474 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2475 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2476 if (ret) {
2477 perror("qemu_madvise");
2478 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2479 "but dump_guest_core=off specified\n");
2480 }
2481 }
2482 }
2483
2484 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2485 {
2486 RAMBlock *new_block, *block;
2487
2488 new_block = NULL;
2489 QLIST_FOREACH(block, &ram_list.blocks, next) {
2490 if (block->offset == addr) {
2491 new_block = block;
2492 break;
2493 }
2494 }
2495 assert(new_block);
2496 assert(!new_block->idstr[0]);
2497
2498 if (dev) {
2499 char *id = qdev_get_dev_path(dev);
2500 if (id) {
2501 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2502 g_free(id);
2503 }
2504 }
2505 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2506
2507 QLIST_FOREACH(block, &ram_list.blocks, next) {
2508 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2509 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2510 new_block->idstr);
2511 abort();
2512 }
2513 }
2514 }
2515
2516 static int memory_try_enable_merging(void *addr, size_t len)
2517 {
2518 QemuOpts *opts;
2519
2520 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2521 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2522 /* disabled by the user */
2523 return 0;
2524 }
2525
2526 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2527 }
2528
2529 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2530 MemoryRegion *mr)
2531 {
2532 RAMBlock *new_block;
2533
2534 size = TARGET_PAGE_ALIGN(size);
2535 new_block = g_malloc0(sizeof(*new_block));
2536
2537 new_block->mr = mr;
2538 new_block->offset = find_ram_offset(size);
2539 if (host) {
2540 new_block->host = host;
2541 new_block->flags |= RAM_PREALLOC_MASK;
2542 } else {
2543 if (mem_path) {
2544 #if defined (__linux__) && !defined(TARGET_S390X)
2545 new_block->host = file_ram_alloc(new_block, size, mem_path);
2546 if (!new_block->host) {
2547 new_block->host = qemu_vmalloc(size);
2548 memory_try_enable_merging(new_block->host, size);
2549 }
2550 #else
2551 fprintf(stderr, "-mem-path option unsupported\n");
2552 exit(1);
2553 #endif
2554 } else {
2555 if (xen_enabled()) {
2556 xen_ram_alloc(new_block->offset, size, mr);
2557 } else if (kvm_enabled()) {
2558 /* some s390/kvm configurations have special constraints */
2559 new_block->host = kvm_vmalloc(size);
2560 } else {
2561 new_block->host = qemu_vmalloc(size);
2562 }
2563 memory_try_enable_merging(new_block->host, size);
2564 }
2565 }
2566 new_block->length = size;
2567
2568 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2569
2570 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2571 last_ram_offset() >> TARGET_PAGE_BITS);
2572 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2573 0, size >> TARGET_PAGE_BITS);
2574 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2575
2576 qemu_ram_setup_dump(new_block->host, size);
2577 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2578
2579 if (kvm_enabled())
2580 kvm_setup_guest_memory(new_block->host, size);
2581
2582 return new_block->offset;
2583 }
2584
2585 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2586 {
2587 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2588 }
2589
2590 void qemu_ram_free_from_ptr(ram_addr_t addr)
2591 {
2592 RAMBlock *block;
2593
2594 QLIST_FOREACH(block, &ram_list.blocks, next) {
2595 if (addr == block->offset) {
2596 QLIST_REMOVE(block, next);
2597 g_free(block);
2598 return;
2599 }
2600 }
2601 }
2602
2603 void qemu_ram_free(ram_addr_t addr)
2604 {
2605 RAMBlock *block;
2606
2607 QLIST_FOREACH(block, &ram_list.blocks, next) {
2608 if (addr == block->offset) {
2609 QLIST_REMOVE(block, next);
2610 if (block->flags & RAM_PREALLOC_MASK) {
2611 ;
2612 } else if (mem_path) {
2613 #if defined (__linux__) && !defined(TARGET_S390X)
2614 if (block->fd) {
2615 munmap(block->host, block->length);
2616 close(block->fd);
2617 } else {
2618 qemu_vfree(block->host);
2619 }
2620 #else
2621 abort();
2622 #endif
2623 } else {
2624 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2625 munmap(block->host, block->length);
2626 #else
2627 if (xen_enabled()) {
2628 xen_invalidate_map_cache_entry(block->host);
2629 } else {
2630 qemu_vfree(block->host);
2631 }
2632 #endif
2633 }
2634 g_free(block);
2635 return;
2636 }
2637 }
2638
2639 }
2640
2641 #ifndef _WIN32
2642 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2643 {
2644 RAMBlock *block;
2645 ram_addr_t offset;
2646 int flags;
2647 void *area, *vaddr;
2648
2649 QLIST_FOREACH(block, &ram_list.blocks, next) {
2650 offset = addr - block->offset;
2651 if (offset < block->length) {
2652 vaddr = block->host + offset;
2653 if (block->flags & RAM_PREALLOC_MASK) {
2654 ;
2655 } else {
2656 flags = MAP_FIXED;
2657 munmap(vaddr, length);
2658 if (mem_path) {
2659 #if defined(__linux__) && !defined(TARGET_S390X)
2660 if (block->fd) {
2661 #ifdef MAP_POPULATE
2662 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2663 MAP_PRIVATE;
2664 #else
2665 flags |= MAP_PRIVATE;
2666 #endif
2667 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2668 flags, block->fd, offset);
2669 } else {
2670 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2671 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2672 flags, -1, 0);
2673 }
2674 #else
2675 abort();
2676 #endif
2677 } else {
2678 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2679 flags |= MAP_SHARED | MAP_ANONYMOUS;
2680 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2681 flags, -1, 0);
2682 #else
2683 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2685 flags, -1, 0);
2686 #endif
2687 }
2688 if (area != vaddr) {
2689 fprintf(stderr, "Could not remap addr: "
2690 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2691 length, addr);
2692 exit(1);
2693 }
2694 memory_try_enable_merging(vaddr, length);
2695 qemu_ram_setup_dump(vaddr, length);
2696 }
2697 return;
2698 }
2699 }
2700 }
2701 #endif /* !_WIN32 */
2702
2703 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2704 With the exception of the softmmu code in this file, this should
2705 only be used for local memory (e.g. video ram) that the device owns,
2706 and knows it isn't going to access beyond the end of the block.
2707
2708 It should not be used for general purpose DMA.
2709 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2710 */
2711 void *qemu_get_ram_ptr(ram_addr_t addr)
2712 {
2713 RAMBlock *block;
2714
2715 QLIST_FOREACH(block, &ram_list.blocks, next) {
2716 if (addr - block->offset < block->length) {
2717 /* Move this entry to to start of the list. */
2718 if (block != QLIST_FIRST(&ram_list.blocks)) {
2719 QLIST_REMOVE(block, next);
2720 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2721 }
2722 if (xen_enabled()) {
2723 /* We need to check if the requested address is in the RAM
2724 * because we don't want to map the entire memory in QEMU.
2725 * In that case just map until the end of the page.
2726 */
2727 if (block->offset == 0) {
2728 return xen_map_cache(addr, 0, 0);
2729 } else if (block->host == NULL) {
2730 block->host =
2731 xen_map_cache(block->offset, block->length, 1);
2732 }
2733 }
2734 return block->host + (addr - block->offset);
2735 }
2736 }
2737
2738 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2739 abort();
2740
2741 return NULL;
2742 }
2743
2744 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2745 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2746 */
2747 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2748 {
2749 RAMBlock *block;
2750
2751 QLIST_FOREACH(block, &ram_list.blocks, next) {
2752 if (addr - block->offset < block->length) {
2753 if (xen_enabled()) {
2754 /* We need to check if the requested address is in the RAM
2755 * because we don't want to map the entire memory in QEMU.
2756 * In that case just map until the end of the page.
2757 */
2758 if (block->offset == 0) {
2759 return xen_map_cache(addr, 0, 0);
2760 } else if (block->host == NULL) {
2761 block->host =
2762 xen_map_cache(block->offset, block->length, 1);
2763 }
2764 }
2765 return block->host + (addr - block->offset);
2766 }
2767 }
2768
2769 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2770 abort();
2771
2772 return NULL;
2773 }
2774
2775 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2776 * but takes a size argument */
2777 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2778 {
2779 if (*size == 0) {
2780 return NULL;
2781 }
2782 if (xen_enabled()) {
2783 return xen_map_cache(addr, *size, 1);
2784 } else {
2785 RAMBlock *block;
2786
2787 QLIST_FOREACH(block, &ram_list.blocks, next) {
2788 if (addr - block->offset < block->length) {
2789 if (addr - block->offset + *size > block->length)
2790 *size = block->length - addr + block->offset;
2791 return block->host + (addr - block->offset);
2792 }
2793 }
2794
2795 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2796 abort();
2797 }
2798 }
2799
2800 void qemu_put_ram_ptr(void *addr)
2801 {
2802 trace_qemu_put_ram_ptr(addr);
2803 }
2804
2805 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2806 {
2807 RAMBlock *block;
2808 uint8_t *host = ptr;
2809
2810 if (xen_enabled()) {
2811 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2812 return 0;
2813 }
2814
2815 QLIST_FOREACH(block, &ram_list.blocks, next) {
2816 /* This case append when the block is not mapped. */
2817 if (block->host == NULL) {
2818 continue;
2819 }
2820 if (host - block->host < block->length) {
2821 *ram_addr = block->offset + (host - block->host);
2822 return 0;
2823 }
2824 }
2825
2826 return -1;
2827 }
2828
2829 /* Some of the softmmu routines need to translate from a host pointer
2830 (typically a TLB entry) back to a ram offset. */
2831 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2832 {
2833 ram_addr_t ram_addr;
2834
2835 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2836 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2837 abort();
2838 }
2839 return ram_addr;
2840 }
2841
2842 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2843 unsigned size)
2844 {
2845 #ifdef DEBUG_UNASSIGNED
2846 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2847 #endif
2848 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2849 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2850 #endif
2851 return 0;
2852 }
2853
2854 static void unassigned_mem_write(void *opaque, hwaddr addr,
2855 uint64_t val, unsigned size)
2856 {
2857 #ifdef DEBUG_UNASSIGNED
2858 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2859 #endif
2860 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2861 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2862 #endif
2863 }
2864
2865 static const MemoryRegionOps unassigned_mem_ops = {
2866 .read = unassigned_mem_read,
2867 .write = unassigned_mem_write,
2868 .endianness = DEVICE_NATIVE_ENDIAN,
2869 };
2870
2871 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2872 unsigned size)
2873 {
2874 abort();
2875 }
2876
2877 static void error_mem_write(void *opaque, hwaddr addr,
2878 uint64_t value, unsigned size)
2879 {
2880 abort();
2881 }
2882
2883 static const MemoryRegionOps error_mem_ops = {
2884 .read = error_mem_read,
2885 .write = error_mem_write,
2886 .endianness = DEVICE_NATIVE_ENDIAN,
2887 };
2888
2889 static const MemoryRegionOps rom_mem_ops = {
2890 .read = error_mem_read,
2891 .write = unassigned_mem_write,
2892 .endianness = DEVICE_NATIVE_ENDIAN,
2893 };
2894
2895 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2896 uint64_t val, unsigned size)
2897 {
2898 int dirty_flags;
2899 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2900 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2901 #if !defined(CONFIG_USER_ONLY)
2902 tb_invalidate_phys_page_fast(ram_addr, size);
2903 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2904 #endif
2905 }
2906 switch (size) {
2907 case 1:
2908 stb_p(qemu_get_ram_ptr(ram_addr), val);
2909 break;
2910 case 2:
2911 stw_p(qemu_get_ram_ptr(ram_addr), val);
2912 break;
2913 case 4:
2914 stl_p(qemu_get_ram_ptr(ram_addr), val);
2915 break;
2916 default:
2917 abort();
2918 }
2919 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2920 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2921 /* we remove the notdirty callback only if the code has been
2922 flushed */
2923 if (dirty_flags == 0xff)
2924 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2925 }
2926
2927 static const MemoryRegionOps notdirty_mem_ops = {
2928 .read = error_mem_read,
2929 .write = notdirty_mem_write,
2930 .endianness = DEVICE_NATIVE_ENDIAN,
2931 };
2932
2933 /* Generate a debug exception if a watchpoint has been hit. */
2934 static void check_watchpoint(int offset, int len_mask, int flags)
2935 {
2936 CPUArchState *env = cpu_single_env;
2937 target_ulong pc, cs_base;
2938 TranslationBlock *tb;
2939 target_ulong vaddr;
2940 CPUWatchpoint *wp;
2941 int cpu_flags;
2942
2943 if (env->watchpoint_hit) {
2944 /* We re-entered the check after replacing the TB. Now raise
2945 * the debug interrupt so that is will trigger after the
2946 * current instruction. */
2947 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2948 return;
2949 }
2950 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2951 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2952 if ((vaddr == (wp->vaddr & len_mask) ||
2953 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2954 wp->flags |= BP_WATCHPOINT_HIT;
2955 if (!env->watchpoint_hit) {
2956 env->watchpoint_hit = wp;
2957 tb = tb_find_pc(env->mem_io_pc);
2958 if (!tb) {
2959 cpu_abort(env, "check_watchpoint: could not find TB for "
2960 "pc=%p", (void *)env->mem_io_pc);
2961 }
2962 cpu_restore_state(tb, env, env->mem_io_pc);
2963 tb_phys_invalidate(tb, -1);
2964 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2965 env->exception_index = EXCP_DEBUG;
2966 cpu_loop_exit(env);
2967 } else {
2968 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2969 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2970 cpu_resume_from_signal(env, NULL);
2971 }
2972 }
2973 } else {
2974 wp->flags &= ~BP_WATCHPOINT_HIT;
2975 }
2976 }
2977 }
2978
2979 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2980 so these check for a hit then pass through to the normal out-of-line
2981 phys routines. */
2982 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2983 unsigned size)
2984 {
2985 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2986 switch (size) {
2987 case 1: return ldub_phys(addr);
2988 case 2: return lduw_phys(addr);
2989 case 4: return ldl_phys(addr);
2990 default: abort();
2991 }
2992 }
2993
2994 static void watch_mem_write(void *opaque, hwaddr addr,
2995 uint64_t val, unsigned size)
2996 {
2997 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2998 switch (size) {
2999 case 1:
3000 stb_phys(addr, val);
3001 break;
3002 case 2:
3003 stw_phys(addr, val);
3004 break;
3005 case 4:
3006 stl_phys(addr, val);
3007 break;
3008 default: abort();
3009 }
3010 }
3011
3012 static const MemoryRegionOps watch_mem_ops = {
3013 .read = watch_mem_read,
3014 .write = watch_mem_write,
3015 .endianness = DEVICE_NATIVE_ENDIAN,
3016 };
3017
3018 static uint64_t subpage_read(void *opaque, hwaddr addr,
3019 unsigned len)
3020 {
3021 subpage_t *mmio = opaque;
3022 unsigned int idx = SUBPAGE_IDX(addr);
3023 MemoryRegionSection *section;
3024 #if defined(DEBUG_SUBPAGE)
3025 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3026 mmio, len, addr, idx);
3027 #endif
3028
3029 section = &phys_sections[mmio->sub_section[idx]];
3030 addr += mmio->base;
3031 addr -= section->offset_within_address_space;
3032 addr += section->offset_within_region;
3033 return io_mem_read(section->mr, addr, len);
3034 }
3035
3036 static void subpage_write(void *opaque, hwaddr addr,
3037 uint64_t value, unsigned len)
3038 {
3039 subpage_t *mmio = opaque;
3040 unsigned int idx = SUBPAGE_IDX(addr);
3041 MemoryRegionSection *section;
3042 #if defined(DEBUG_SUBPAGE)
3043 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3044 " idx %d value %"PRIx64"\n",
3045 __func__, mmio, len, addr, idx, value);
3046 #endif
3047
3048 section = &phys_sections[mmio->sub_section[idx]];
3049 addr += mmio->base;
3050 addr -= section->offset_within_address_space;
3051 addr += section->offset_within_region;
3052 io_mem_write(section->mr, addr, value, len);
3053 }
3054
3055 static const MemoryRegionOps subpage_ops = {
3056 .read = subpage_read,
3057 .write = subpage_write,
3058 .endianness = DEVICE_NATIVE_ENDIAN,
3059 };
3060
3061 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3062 unsigned size)
3063 {
3064 ram_addr_t raddr = addr;
3065 void *ptr = qemu_get_ram_ptr(raddr);
3066 switch (size) {
3067 case 1: return ldub_p(ptr);
3068 case 2: return lduw_p(ptr);
3069 case 4: return ldl_p(ptr);
3070 default: abort();
3071 }
3072 }
3073
3074 static void subpage_ram_write(void *opaque, hwaddr addr,
3075 uint64_t value, unsigned size)
3076 {
3077 ram_addr_t raddr = addr;
3078 void *ptr = qemu_get_ram_ptr(raddr);
3079 switch (size) {
3080 case 1: return stb_p(ptr, value);
3081 case 2: return stw_p(ptr, value);
3082 case 4: return stl_p(ptr, value);
3083 default: abort();
3084 }
3085 }
3086
3087 static const MemoryRegionOps subpage_ram_ops = {
3088 .read = subpage_ram_read,
3089 .write = subpage_ram_write,
3090 .endianness = DEVICE_NATIVE_ENDIAN,
3091 };
3092
3093 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3094 uint16_t section)
3095 {
3096 int idx, eidx;
3097
3098 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3099 return -1;
3100 idx = SUBPAGE_IDX(start);
3101 eidx = SUBPAGE_IDX(end);
3102 #if defined(DEBUG_SUBPAGE)
3103 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3104 mmio, start, end, idx, eidx, memory);
3105 #endif
3106 if (memory_region_is_ram(phys_sections[section].mr)) {
3107 MemoryRegionSection new_section = phys_sections[section];
3108 new_section.mr = &io_mem_subpage_ram;
3109 section = phys_section_add(&new_section);
3110 }
3111 for (; idx <= eidx; idx++) {
3112 mmio->sub_section[idx] = section;
3113 }
3114
3115 return 0;
3116 }
3117
3118 static subpage_t *subpage_init(hwaddr base)
3119 {
3120 subpage_t *mmio;
3121
3122 mmio = g_malloc0(sizeof(subpage_t));
3123
3124 mmio->base = base;
3125 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3126 "subpage", TARGET_PAGE_SIZE);
3127 mmio->iomem.subpage = true;
3128 #if defined(DEBUG_SUBPAGE)
3129 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3130 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3131 #endif
3132 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3133
3134 return mmio;
3135 }
3136
3137 static uint16_t dummy_section(MemoryRegion *mr)
3138 {
3139 MemoryRegionSection section = {
3140 .mr = mr,
3141 .offset_within_address_space = 0,
3142 .offset_within_region = 0,
3143 .size = UINT64_MAX,
3144 };
3145
3146 return phys_section_add(&section);
3147 }
3148
3149 MemoryRegion *iotlb_to_region(hwaddr index)
3150 {
3151 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3152 }
3153
3154 static void io_mem_init(void)
3155 {
3156 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3157 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3158 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3159 "unassigned", UINT64_MAX);
3160 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3161 "notdirty", UINT64_MAX);
3162 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3163 "subpage-ram", UINT64_MAX);
3164 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3165 "watch", UINT64_MAX);
3166 }
3167
3168 static void mem_begin(MemoryListener *listener)
3169 {
3170 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3171
3172 destroy_all_mappings(d);
3173 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3174 }
3175
3176 static void core_begin(MemoryListener *listener)
3177 {
3178 phys_sections_clear();
3179 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3180 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3181 phys_section_rom = dummy_section(&io_mem_rom);
3182 phys_section_watch = dummy_section(&io_mem_watch);
3183 }
3184
3185 static void tcg_commit(MemoryListener *listener)
3186 {
3187 CPUArchState *env;
3188
3189 /* since each CPU stores ram addresses in its TLB cache, we must
3190 reset the modified entries */
3191 /* XXX: slow ! */
3192 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3193 tlb_flush(env, 1);
3194 }
3195 }
3196
3197 static void core_log_global_start(MemoryListener *listener)
3198 {
3199 cpu_physical_memory_set_dirty_tracking(1);
3200 }
3201
3202 static void core_log_global_stop(MemoryListener *listener)
3203 {
3204 cpu_physical_memory_set_dirty_tracking(0);
3205 }
3206
3207 static void io_region_add(MemoryListener *listener,
3208 MemoryRegionSection *section)
3209 {
3210 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3211
3212 mrio->mr = section->mr;
3213 mrio->offset = section->offset_within_region;
3214 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3215 section->offset_within_address_space, section->size);
3216 ioport_register(&mrio->iorange);
3217 }
3218
3219 static void io_region_del(MemoryListener *listener,
3220 MemoryRegionSection *section)
3221 {
3222 isa_unassign_ioport(section->offset_within_address_space, section->size);
3223 }
3224
3225 static MemoryListener core_memory_listener = {
3226 .begin = core_begin,
3227 .log_global_start = core_log_global_start,
3228 .log_global_stop = core_log_global_stop,
3229 .priority = 1,
3230 };
3231
3232 static MemoryListener io_memory_listener = {
3233 .region_add = io_region_add,
3234 .region_del = io_region_del,
3235 .priority = 0,
3236 };
3237
3238 static MemoryListener tcg_memory_listener = {
3239 .commit = tcg_commit,
3240 };
3241
3242 void address_space_init_dispatch(AddressSpace *as)
3243 {
3244 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3245
3246 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3247 d->listener = (MemoryListener) {
3248 .begin = mem_begin,
3249 .region_add = mem_add,
3250 .region_nop = mem_add,
3251 .priority = 0,
3252 };
3253 as->dispatch = d;
3254 memory_listener_register(&d->listener, as);
3255 }
3256
3257 void address_space_destroy_dispatch(AddressSpace *as)
3258 {
3259 AddressSpaceDispatch *d = as->dispatch;
3260
3261 memory_listener_unregister(&d->listener);
3262 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3263 g_free(d);
3264 as->dispatch = NULL;
3265 }
3266
3267 static void memory_map_init(void)
3268 {
3269 system_memory = g_malloc(sizeof(*system_memory));
3270 memory_region_init(system_memory, "system", INT64_MAX);
3271 address_space_init(&address_space_memory, system_memory);
3272 address_space_memory.name = "memory";
3273
3274 system_io = g_malloc(sizeof(*system_io));
3275 memory_region_init(system_io, "io", 65536);
3276 address_space_init(&address_space_io, system_io);
3277 address_space_io.name = "I/O";
3278
3279 memory_listener_register(&core_memory_listener, &address_space_memory);
3280 memory_listener_register(&io_memory_listener, &address_space_io);
3281 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3282 }
3283
3284 MemoryRegion *get_system_memory(void)
3285 {
3286 return system_memory;
3287 }
3288
3289 MemoryRegion *get_system_io(void)
3290 {
3291 return system_io;
3292 }
3293
3294 #endif /* !defined(CONFIG_USER_ONLY) */
3295
3296 /* physical memory access (slow version, mainly for debug) */
3297 #if defined(CONFIG_USER_ONLY)
3298 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3299 uint8_t *buf, int len, int is_write)
3300 {
3301 int l, flags;
3302 target_ulong page;
3303 void * p;
3304
3305 while (len > 0) {
3306 page = addr & TARGET_PAGE_MASK;
3307 l = (page + TARGET_PAGE_SIZE) - addr;
3308 if (l > len)
3309 l = len;
3310 flags = page_get_flags(page);
3311 if (!(flags & PAGE_VALID))
3312 return -1;
3313 if (is_write) {
3314 if (!(flags & PAGE_WRITE))
3315 return -1;
3316 /* XXX: this code should not depend on lock_user */
3317 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3318 return -1;
3319 memcpy(p, buf, l);
3320 unlock_user(p, addr, l);
3321 } else {
3322 if (!(flags & PAGE_READ))
3323 return -1;
3324 /* XXX: this code should not depend on lock_user */
3325 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3326 return -1;
3327 memcpy(buf, p, l);
3328 unlock_user(p, addr, 0);
3329 }
3330 len -= l;
3331 buf += l;
3332 addr += l;
3333 }
3334 return 0;
3335 }
3336
3337 #else
3338
3339 static void invalidate_and_set_dirty(hwaddr addr,
3340 hwaddr length)
3341 {
3342 if (!cpu_physical_memory_is_dirty(addr)) {
3343 /* invalidate code */
3344 tb_invalidate_phys_page_range(addr, addr + length, 0);
3345 /* set dirty bit */
3346 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3347 }
3348 xen_modified_memory(addr, length);
3349 }
3350
3351 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3352 int len, bool is_write)
3353 {
3354 AddressSpaceDispatch *d = as->dispatch;
3355 int l;
3356 uint8_t *ptr;
3357 uint32_t val;
3358 hwaddr page;
3359 MemoryRegionSection *section;
3360
3361 while (len > 0) {
3362 page = addr & TARGET_PAGE_MASK;
3363 l = (page + TARGET_PAGE_SIZE) - addr;
3364 if (l > len)
3365 l = len;
3366 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3367
3368 if (is_write) {
3369 if (!memory_region_is_ram(section->mr)) {
3370 hwaddr addr1;
3371 addr1 = memory_region_section_addr(section, addr);
3372 /* XXX: could force cpu_single_env to NULL to avoid
3373 potential bugs */
3374 if (l >= 4 && ((addr1 & 3) == 0)) {
3375 /* 32 bit write access */
3376 val = ldl_p(buf);
3377 io_mem_write(section->mr, addr1, val, 4);
3378 l = 4;
3379 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3380 /* 16 bit write access */
3381 val = lduw_p(buf);
3382 io_mem_write(section->mr, addr1, val, 2);
3383 l = 2;
3384 } else {
3385 /* 8 bit write access */
3386 val = ldub_p(buf);
3387 io_mem_write(section->mr, addr1, val, 1);
3388 l = 1;
3389 }
3390 } else if (!section->readonly) {
3391 ram_addr_t addr1;
3392 addr1 = memory_region_get_ram_addr(section->mr)
3393 + memory_region_section_addr(section, addr);
3394 /* RAM case */
3395 ptr = qemu_get_ram_ptr(addr1);
3396 memcpy(ptr, buf, l);
3397 invalidate_and_set_dirty(addr1, l);
3398 qemu_put_ram_ptr(ptr);
3399 }
3400 } else {
3401 if (!(memory_region_is_ram(section->mr) ||
3402 memory_region_is_romd(section->mr))) {
3403 hwaddr addr1;
3404 /* I/O case */
3405 addr1 = memory_region_section_addr(section, addr);
3406 if (l >= 4 && ((addr1 & 3) == 0)) {
3407 /* 32 bit read access */
3408 val = io_mem_read(section->mr, addr1, 4);
3409 stl_p(buf, val);
3410 l = 4;
3411 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3412 /* 16 bit read access */
3413 val = io_mem_read(section->mr, addr1, 2);
3414 stw_p(buf, val);
3415 l = 2;
3416 } else {
3417 /* 8 bit read access */
3418 val = io_mem_read(section->mr, addr1, 1);
3419 stb_p(buf, val);
3420 l = 1;
3421 }
3422 } else {
3423 /* RAM case */
3424 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3425 + memory_region_section_addr(section,
3426 addr));
3427 memcpy(buf, ptr, l);
3428 qemu_put_ram_ptr(ptr);
3429 }
3430 }
3431 len -= l;
3432 buf += l;
3433 addr += l;
3434 }
3435 }
3436
3437 void address_space_write(AddressSpace *as, hwaddr addr,
3438 const uint8_t *buf, int len)
3439 {
3440 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3441 }
3442
3443 /**
3444 * address_space_read: read from an address space.
3445 *
3446 * @as: #AddressSpace to be accessed
3447 * @addr: address within that address space
3448 * @buf: buffer with the data transferred
3449 */
3450 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3451 {
3452 address_space_rw(as, addr, buf, len, false);
3453 }
3454
3455
3456 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3457 int len, int is_write)
3458 {
3459 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3460 }
3461
3462 /* used for ROM loading : can write in RAM and ROM */
3463 void cpu_physical_memory_write_rom(hwaddr addr,
3464 const uint8_t *buf, int len)
3465 {
3466 AddressSpaceDispatch *d = address_space_memory.dispatch;
3467 int l;
3468 uint8_t *ptr;
3469 hwaddr page;
3470 MemoryRegionSection *section;
3471
3472 while (len > 0) {
3473 page = addr & TARGET_PAGE_MASK;
3474 l = (page + TARGET_PAGE_SIZE) - addr;
3475 if (l > len)
3476 l = len;
3477 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3478
3479 if (!(memory_region_is_ram(section->mr) ||
3480 memory_region_is_romd(section->mr))) {
3481 /* do nothing */
3482 } else {
3483 unsigned long addr1;
3484 addr1 = memory_region_get_ram_addr(section->mr)
3485 + memory_region_section_addr(section, addr);
3486 /* ROM/RAM case */
3487 ptr = qemu_get_ram_ptr(addr1);
3488 memcpy(ptr, buf, l);
3489 invalidate_and_set_dirty(addr1, l);
3490 qemu_put_ram_ptr(ptr);
3491 }
3492 len -= l;
3493 buf += l;
3494 addr += l;
3495 }
3496 }
3497
3498 typedef struct {
3499 void *buffer;
3500 hwaddr addr;
3501 hwaddr len;
3502 } BounceBuffer;
3503
3504 static BounceBuffer bounce;
3505
3506 typedef struct MapClient {
3507 void *opaque;
3508 void (*callback)(void *opaque);
3509 QLIST_ENTRY(MapClient) link;
3510 } MapClient;
3511
3512 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3513 = QLIST_HEAD_INITIALIZER(map_client_list);
3514
3515 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3516 {
3517 MapClient *client = g_malloc(sizeof(*client));
3518
3519 client->opaque = opaque;
3520 client->callback = callback;
3521 QLIST_INSERT_HEAD(&map_client_list, client, link);
3522 return client;
3523 }
3524
3525 static void cpu_unregister_map_client(void *_client)
3526 {
3527 MapClient *client = (MapClient *)_client;
3528
3529 QLIST_REMOVE(client, link);
3530 g_free(client);
3531 }
3532
3533 static void cpu_notify_map_clients(void)
3534 {
3535 MapClient *client;
3536
3537 while (!QLIST_EMPTY(&map_client_list)) {
3538 client = QLIST_FIRST(&map_client_list);
3539 client->callback(client->opaque);
3540 cpu_unregister_map_client(client);
3541 }
3542 }
3543
3544 /* Map a physical memory region into a host virtual address.
3545 * May map a subset of the requested range, given by and returned in *plen.
3546 * May return NULL if resources needed to perform the mapping are exhausted.
3547 * Use only for reads OR writes - not for read-modify-write operations.
3548 * Use cpu_register_map_client() to know when retrying the map operation is
3549 * likely to succeed.
3550 */
3551 void *address_space_map(AddressSpace *as,
3552 hwaddr addr,
3553 hwaddr *plen,
3554 bool is_write)
3555 {
3556 AddressSpaceDispatch *d = as->dispatch;
3557 hwaddr len = *plen;
3558 hwaddr todo = 0;
3559 int l;
3560 hwaddr page;
3561 MemoryRegionSection *section;
3562 ram_addr_t raddr = RAM_ADDR_MAX;
3563 ram_addr_t rlen;
3564 void *ret;
3565
3566 while (len > 0) {
3567 page = addr & TARGET_PAGE_MASK;
3568 l = (page + TARGET_PAGE_SIZE) - addr;
3569 if (l > len)
3570 l = len;
3571 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3572
3573 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3574 if (todo || bounce.buffer) {
3575 break;
3576 }
3577 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3578 bounce.addr = addr;
3579 bounce.len = l;
3580 if (!is_write) {
3581 address_space_read(as, addr, bounce.buffer, l);
3582 }
3583
3584 *plen = l;
3585 return bounce.buffer;
3586 }
3587 if (!todo) {
3588 raddr = memory_region_get_ram_addr(section->mr)
3589 + memory_region_section_addr(section, addr);
3590 }
3591
3592 len -= l;
3593 addr += l;
3594 todo += l;
3595 }
3596 rlen = todo;
3597 ret = qemu_ram_ptr_length(raddr, &rlen);
3598 *plen = rlen;
3599 return ret;
3600 }
3601
3602 /* Unmaps a memory region previously mapped by address_space_map().
3603 * Will also mark the memory as dirty if is_write == 1. access_len gives
3604 * the amount of memory that was actually read or written by the caller.
3605 */
3606 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3607 int is_write, hwaddr access_len)
3608 {
3609 if (buffer != bounce.buffer) {
3610 if (is_write) {
3611 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3612 while (access_len) {
3613 unsigned l;
3614 l = TARGET_PAGE_SIZE;
3615 if (l > access_len)
3616 l = access_len;
3617 invalidate_and_set_dirty(addr1, l);
3618 addr1 += l;
3619 access_len -= l;
3620 }
3621 }
3622 if (xen_enabled()) {
3623 xen_invalidate_map_cache_entry(buffer);
3624 }
3625 return;
3626 }
3627 if (is_write) {
3628 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3629 }
3630 qemu_vfree(bounce.buffer);
3631 bounce.buffer = NULL;
3632 cpu_notify_map_clients();
3633 }
3634
3635 void *cpu_physical_memory_map(hwaddr addr,
3636 hwaddr *plen,
3637 int is_write)
3638 {
3639 return address_space_map(&address_space_memory, addr, plen, is_write);
3640 }
3641
3642 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3643 int is_write, hwaddr access_len)
3644 {
3645 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3646 }
3647
3648 /* warning: addr must be aligned */
3649 static inline uint32_t ldl_phys_internal(hwaddr addr,
3650 enum device_endian endian)
3651 {
3652 uint8_t *ptr;
3653 uint32_t val;
3654 MemoryRegionSection *section;
3655
3656 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3657
3658 if (!(memory_region_is_ram(section->mr) ||
3659 memory_region_is_romd(section->mr))) {
3660 /* I/O case */
3661 addr = memory_region_section_addr(section, addr);
3662 val = io_mem_read(section->mr, addr, 4);
3663 #if defined(TARGET_WORDS_BIGENDIAN)
3664 if (endian == DEVICE_LITTLE_ENDIAN) {
3665 val = bswap32(val);
3666 }
3667 #else
3668 if (endian == DEVICE_BIG_ENDIAN) {
3669 val = bswap32(val);
3670 }
3671 #endif
3672 } else {
3673 /* RAM case */
3674 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3675 & TARGET_PAGE_MASK)
3676 + memory_region_section_addr(section, addr));
3677 switch (endian) {
3678 case DEVICE_LITTLE_ENDIAN:
3679 val = ldl_le_p(ptr);
3680 break;
3681 case DEVICE_BIG_ENDIAN:
3682 val = ldl_be_p(ptr);
3683 break;
3684 default:
3685 val = ldl_p(ptr);
3686 break;
3687 }
3688 }
3689 return val;
3690 }
3691
3692 uint32_t ldl_phys(hwaddr addr)
3693 {
3694 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3695 }
3696
3697 uint32_t ldl_le_phys(hwaddr addr)
3698 {
3699 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3700 }
3701
3702 uint32_t ldl_be_phys(hwaddr addr)
3703 {
3704 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3705 }
3706
3707 /* warning: addr must be aligned */
3708 static inline uint64_t ldq_phys_internal(hwaddr addr,
3709 enum device_endian endian)
3710 {
3711 uint8_t *ptr;
3712 uint64_t val;
3713 MemoryRegionSection *section;
3714
3715 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3716
3717 if (!(memory_region_is_ram(section->mr) ||
3718 memory_region_is_romd(section->mr))) {
3719 /* I/O case */
3720 addr = memory_region_section_addr(section, addr);
3721
3722 /* XXX This is broken when device endian != cpu endian.
3723 Fix and add "endian" variable check */
3724 #ifdef TARGET_WORDS_BIGENDIAN
3725 val = io_mem_read(section->mr, addr, 4) << 32;
3726 val |= io_mem_read(section->mr, addr + 4, 4);
3727 #else
3728 val = io_mem_read(section->mr, addr, 4);
3729 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3730 #endif
3731 } else {
3732 /* RAM case */
3733 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3734 & TARGET_PAGE_MASK)
3735 + memory_region_section_addr(section, addr));
3736 switch (endian) {
3737 case DEVICE_LITTLE_ENDIAN:
3738 val = ldq_le_p(ptr);
3739 break;
3740 case DEVICE_BIG_ENDIAN:
3741 val = ldq_be_p(ptr);
3742 break;
3743 default:
3744 val = ldq_p(ptr);
3745 break;
3746 }
3747 }
3748 return val;
3749 }
3750
3751 uint64_t ldq_phys(hwaddr addr)
3752 {
3753 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3754 }
3755
3756 uint64_t ldq_le_phys(hwaddr addr)
3757 {
3758 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3759 }
3760
3761 uint64_t ldq_be_phys(hwaddr addr)
3762 {
3763 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3764 }
3765
3766 /* XXX: optimize */
3767 uint32_t ldub_phys(hwaddr addr)
3768 {
3769 uint8_t val;
3770 cpu_physical_memory_read(addr, &val, 1);
3771 return val;
3772 }
3773
3774 /* warning: addr must be aligned */
3775 static inline uint32_t lduw_phys_internal(hwaddr addr,
3776 enum device_endian endian)
3777 {
3778 uint8_t *ptr;
3779 uint64_t val;
3780 MemoryRegionSection *section;
3781
3782 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3783
3784 if (!(memory_region_is_ram(section->mr) ||
3785 memory_region_is_romd(section->mr))) {
3786 /* I/O case */
3787 addr = memory_region_section_addr(section, addr);
3788 val = io_mem_read(section->mr, addr, 2);
3789 #if defined(TARGET_WORDS_BIGENDIAN)
3790 if (endian == DEVICE_LITTLE_ENDIAN) {
3791 val = bswap16(val);
3792 }
3793 #else
3794 if (endian == DEVICE_BIG_ENDIAN) {
3795 val = bswap16(val);
3796 }
3797 #endif
3798 } else {
3799 /* RAM case */
3800 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3801 & TARGET_PAGE_MASK)
3802 + memory_region_section_addr(section, addr));
3803 switch (endian) {
3804 case DEVICE_LITTLE_ENDIAN:
3805 val = lduw_le_p(ptr);
3806 break;
3807 case DEVICE_BIG_ENDIAN:
3808 val = lduw_be_p(ptr);
3809 break;
3810 default:
3811 val = lduw_p(ptr);
3812 break;
3813 }
3814 }
3815 return val;
3816 }
3817
3818 uint32_t lduw_phys(hwaddr addr)
3819 {
3820 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3821 }
3822
3823 uint32_t lduw_le_phys(hwaddr addr)
3824 {
3825 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3826 }
3827
3828 uint32_t lduw_be_phys(hwaddr addr)
3829 {
3830 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3831 }
3832
3833 /* warning: addr must be aligned. The ram page is not masked as dirty
3834 and the code inside is not invalidated. It is useful if the dirty
3835 bits are used to track modified PTEs */
3836 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3837 {
3838 uint8_t *ptr;
3839 MemoryRegionSection *section;
3840
3841 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3842
3843 if (!memory_region_is_ram(section->mr) || section->readonly) {
3844 addr = memory_region_section_addr(section, addr);
3845 if (memory_region_is_ram(section->mr)) {
3846 section = &phys_sections[phys_section_rom];
3847 }
3848 io_mem_write(section->mr, addr, val, 4);
3849 } else {
3850 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3851 & TARGET_PAGE_MASK)
3852 + memory_region_section_addr(section, addr);
3853 ptr = qemu_get_ram_ptr(addr1);
3854 stl_p(ptr, val);
3855
3856 if (unlikely(in_migration)) {
3857 if (!cpu_physical_memory_is_dirty(addr1)) {
3858 /* invalidate code */
3859 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3860 /* set dirty bit */
3861 cpu_physical_memory_set_dirty_flags(
3862 addr1, (0xff & ~CODE_DIRTY_FLAG));
3863 }
3864 }
3865 }
3866 }
3867
3868 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3869 {
3870 uint8_t *ptr;
3871 MemoryRegionSection *section;
3872
3873 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3874
3875 if (!memory_region_is_ram(section->mr) || section->readonly) {
3876 addr = memory_region_section_addr(section, addr);
3877 if (memory_region_is_ram(section->mr)) {
3878 section = &phys_sections[phys_section_rom];
3879 }
3880 #ifdef TARGET_WORDS_BIGENDIAN
3881 io_mem_write(section->mr, addr, val >> 32, 4);
3882 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3883 #else
3884 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3885 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3886 #endif
3887 } else {
3888 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3889 & TARGET_PAGE_MASK)
3890 + memory_region_section_addr(section, addr));
3891 stq_p(ptr, val);
3892 }
3893 }
3894
3895 /* warning: addr must be aligned */
3896 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3897 enum device_endian endian)
3898 {
3899 uint8_t *ptr;
3900 MemoryRegionSection *section;
3901
3902 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3903
3904 if (!memory_region_is_ram(section->mr) || section->readonly) {
3905 addr = memory_region_section_addr(section, addr);
3906 if (memory_region_is_ram(section->mr)) {
3907 section = &phys_sections[phys_section_rom];
3908 }
3909 #if defined(TARGET_WORDS_BIGENDIAN)
3910 if (endian == DEVICE_LITTLE_ENDIAN) {
3911 val = bswap32(val);
3912 }
3913 #else
3914 if (endian == DEVICE_BIG_ENDIAN) {
3915 val = bswap32(val);
3916 }
3917 #endif
3918 io_mem_write(section->mr, addr, val, 4);
3919 } else {
3920 unsigned long addr1;
3921 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3922 + memory_region_section_addr(section, addr);
3923 /* RAM case */
3924 ptr = qemu_get_ram_ptr(addr1);
3925 switch (endian) {
3926 case DEVICE_LITTLE_ENDIAN:
3927 stl_le_p(ptr, val);
3928 break;
3929 case DEVICE_BIG_ENDIAN:
3930 stl_be_p(ptr, val);
3931 break;
3932 default:
3933 stl_p(ptr, val);
3934 break;
3935 }
3936 invalidate_and_set_dirty(addr1, 4);
3937 }
3938 }
3939
3940 void stl_phys(hwaddr addr, uint32_t val)
3941 {
3942 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3943 }
3944
3945 void stl_le_phys(hwaddr addr, uint32_t val)
3946 {
3947 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3948 }
3949
3950 void stl_be_phys(hwaddr addr, uint32_t val)
3951 {
3952 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3953 }
3954
3955 /* XXX: optimize */
3956 void stb_phys(hwaddr addr, uint32_t val)
3957 {
3958 uint8_t v = val;
3959 cpu_physical_memory_write(addr, &v, 1);
3960 }
3961
3962 /* warning: addr must be aligned */
3963 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3964 enum device_endian endian)
3965 {
3966 uint8_t *ptr;
3967 MemoryRegionSection *section;
3968
3969 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3970
3971 if (!memory_region_is_ram(section->mr) || section->readonly) {
3972 addr = memory_region_section_addr(section, addr);
3973 if (memory_region_is_ram(section->mr)) {
3974 section = &phys_sections[phys_section_rom];
3975 }
3976 #if defined(TARGET_WORDS_BIGENDIAN)
3977 if (endian == DEVICE_LITTLE_ENDIAN) {
3978 val = bswap16(val);
3979 }
3980 #else
3981 if (endian == DEVICE_BIG_ENDIAN) {
3982 val = bswap16(val);
3983 }
3984 #endif
3985 io_mem_write(section->mr, addr, val, 2);
3986 } else {
3987 unsigned long addr1;
3988 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3989 + memory_region_section_addr(section, addr);
3990 /* RAM case */
3991 ptr = qemu_get_ram_ptr(addr1);
3992 switch (endian) {
3993 case DEVICE_LITTLE_ENDIAN:
3994 stw_le_p(ptr, val);
3995 break;
3996 case DEVICE_BIG_ENDIAN:
3997 stw_be_p(ptr, val);
3998 break;
3999 default:
4000 stw_p(ptr, val);
4001 break;
4002 }
4003 invalidate_and_set_dirty(addr1, 2);
4004 }
4005 }
4006
4007 void stw_phys(hwaddr addr, uint32_t val)
4008 {
4009 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4010 }
4011
4012 void stw_le_phys(hwaddr addr, uint32_t val)
4013 {
4014 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4015 }
4016
4017 void stw_be_phys(hwaddr addr, uint32_t val)
4018 {
4019 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4020 }
4021
4022 /* XXX: optimize */
4023 void stq_phys(hwaddr addr, uint64_t val)
4024 {
4025 val = tswap64(val);
4026 cpu_physical_memory_write(addr, &val, 8);
4027 }
4028
4029 void stq_le_phys(hwaddr addr, uint64_t val)
4030 {
4031 val = cpu_to_le64(val);
4032 cpu_physical_memory_write(addr, &val, 8);
4033 }
4034
4035 void stq_be_phys(hwaddr addr, uint64_t val)
4036 {
4037 val = cpu_to_be64(val);
4038 cpu_physical_memory_write(addr, &val, 8);
4039 }
4040
4041 /* virtual memory access for debug (includes writing to ROM) */
4042 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4043 uint8_t *buf, int len, int is_write)
4044 {
4045 int l;
4046 hwaddr phys_addr;
4047 target_ulong page;
4048
4049 while (len > 0) {
4050 page = addr & TARGET_PAGE_MASK;
4051 phys_addr = cpu_get_phys_page_debug(env, page);
4052 /* if no physical page mapped, return an error */
4053 if (phys_addr == -1)
4054 return -1;
4055 l = (page + TARGET_PAGE_SIZE) - addr;
4056 if (l > len)
4057 l = len;
4058 phys_addr += (addr & ~TARGET_PAGE_MASK);
4059 if (is_write)
4060 cpu_physical_memory_write_rom(phys_addr, buf, l);
4061 else
4062 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4063 len -= l;
4064 buf += l;
4065 addr += l;
4066 }
4067 return 0;
4068 }
4069 #endif
4070
4071 /* in deterministic execution mode, instructions doing device I/Os
4072 must be at the end of the TB */
4073 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4074 {
4075 TranslationBlock *tb;
4076 uint32_t n, cflags;
4077 target_ulong pc, cs_base;
4078 uint64_t flags;
4079
4080 tb = tb_find_pc(retaddr);
4081 if (!tb) {
4082 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4083 (void *)retaddr);
4084 }
4085 n = env->icount_decr.u16.low + tb->icount;
4086 cpu_restore_state(tb, env, retaddr);
4087 /* Calculate how many instructions had been executed before the fault
4088 occurred. */
4089 n = n - env->icount_decr.u16.low;
4090 /* Generate a new TB ending on the I/O insn. */
4091 n++;
4092 /* On MIPS and SH, delay slot instructions can only be restarted if
4093 they were already the first instruction in the TB. If this is not
4094 the first instruction in a TB then re-execute the preceding
4095 branch. */
4096 #if defined(TARGET_MIPS)
4097 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4098 env->active_tc.PC -= 4;
4099 env->icount_decr.u16.low++;
4100 env->hflags &= ~MIPS_HFLAG_BMASK;
4101 }
4102 #elif defined(TARGET_SH4)
4103 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4104 && n > 1) {
4105 env->pc -= 2;
4106 env->icount_decr.u16.low++;
4107 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4108 }
4109 #endif
4110 /* This should never happen. */
4111 if (n > CF_COUNT_MASK)
4112 cpu_abort(env, "TB too big during recompile");
4113
4114 cflags = n | CF_LAST_IO;
4115 pc = tb->pc;
4116 cs_base = tb->cs_base;
4117 flags = tb->flags;
4118 tb_phys_invalidate(tb, -1);
4119 /* FIXME: In theory this could raise an exception. In practice
4120 we have already translated the block once so it's probably ok. */
4121 tb_gen_code(env, pc, cs_base, flags, cflags);
4122 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4123 the first in the TB) then we end up generating a whole new TB and
4124 repeating the fault, which is horribly inefficient.
4125 Better would be to execute just this insn uncached, or generate a
4126 second new TB. */
4127 cpu_resume_from_signal(env, NULL);
4128 }
4129
4130 #if !defined(CONFIG_USER_ONLY)
4131
4132 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4133 {
4134 int i, target_code_size, max_target_code_size;
4135 int direct_jmp_count, direct_jmp2_count, cross_page;
4136 TranslationBlock *tb;
4137
4138 target_code_size = 0;
4139 max_target_code_size = 0;
4140 cross_page = 0;
4141 direct_jmp_count = 0;
4142 direct_jmp2_count = 0;
4143 for(i = 0; i < nb_tbs; i++) {
4144 tb = &tbs[i];
4145 target_code_size += tb->size;
4146 if (tb->size > max_target_code_size)
4147 max_target_code_size = tb->size;
4148 if (tb->page_addr[1] != -1)
4149 cross_page++;
4150 if (tb->tb_next_offset[0] != 0xffff) {
4151 direct_jmp_count++;
4152 if (tb->tb_next_offset[1] != 0xffff) {
4153 direct_jmp2_count++;
4154 }
4155 }
4156 }
4157 /* XXX: avoid using doubles ? */
4158 cpu_fprintf(f, "Translation buffer state:\n");
4159 cpu_fprintf(f, "gen code size %td/%zd\n",
4160 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4161 cpu_fprintf(f, "TB count %d/%d\n",
4162 nb_tbs, code_gen_max_blocks);
4163 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4164 nb_tbs ? target_code_size / nb_tbs : 0,
4165 max_target_code_size);
4166 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4167 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4168 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4169 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4170 cross_page,
4171 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4172 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4173 direct_jmp_count,
4174 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4175 direct_jmp2_count,
4176 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4177 cpu_fprintf(f, "\nStatistics:\n");
4178 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4179 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4180 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4181 tcg_dump_info(f, cpu_fprintf);
4182 }
4183
4184 /*
4185 * A helper function for the _utterly broken_ virtio device model to find out if
4186 * it's running on a big endian machine. Don't do this at home kids!
4187 */
4188 bool virtio_is_big_endian(void);
4189 bool virtio_is_big_endian(void)
4190 {
4191 #if defined(TARGET_WORDS_BIGENDIAN)
4192 return true;
4193 #else
4194 return false;
4195 #endif
4196 }
4197
4198 #endif
4199
4200 #ifndef CONFIG_USER_ONLY
4201 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4202 {
4203 MemoryRegionSection *section;
4204
4205 section = phys_page_find(address_space_memory.dispatch,
4206 phys_addr >> TARGET_PAGE_BITS);
4207
4208 return !(memory_region_is_ram(section->mr) ||
4209 memory_region_is_romd(section->mr));
4210 }
4211 #endif