]> git.proxmox.com Git - mirror_qemu.git/blame_incremental - exec.c
Convert IO_MEM_{RAM,ROM,UNASSIGNED,NOTDIRTY} to MemoryRegions
[mirror_qemu.git] / exec.c
... / ...
CommitLineData
1/*
2 * virtual page mapping and translated block handling
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include "config.h"
20#ifdef _WIN32
21#include <windows.h>
22#else
23#include <sys/types.h>
24#include <sys/mman.h>
25#endif
26
27#include "qemu-common.h"
28#include "cpu.h"
29#include "tcg.h"
30#include "hw/hw.h"
31#include "hw/qdev.h"
32#include "osdep.h"
33#include "kvm.h"
34#include "hw/xen.h"
35#include "qemu-timer.h"
36#include "memory.h"
37#include "exec-memory.h"
38#if defined(CONFIG_USER_ONLY)
39#include <qemu.h>
40#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41#include <sys/param.h>
42#if __FreeBSD_version >= 700104
43#define HAVE_KINFO_GETVMMAP
44#define sigqueue sigqueue_freebsd /* avoid redefinition */
45#include <sys/time.h>
46#include <sys/proc.h>
47#include <machine/profile.h>
48#define _KERNEL
49#include <sys/user.h>
50#undef _KERNEL
51#undef sigqueue
52#include <libutil.h>
53#endif
54#endif
55#else /* !CONFIG_USER_ONLY */
56#include "xen-mapcache.h"
57#include "trace.h"
58#endif
59
60#define WANT_EXEC_OBSOLETE
61#include "exec-obsolete.h"
62
63//#define DEBUG_TB_INVALIDATE
64//#define DEBUG_FLUSH
65//#define DEBUG_TLB
66//#define DEBUG_UNASSIGNED
67
68/* make various TB consistency checks */
69//#define DEBUG_TB_CHECK
70//#define DEBUG_TLB_CHECK
71
72//#define DEBUG_IOPORT
73//#define DEBUG_SUBPAGE
74
75#if !defined(CONFIG_USER_ONLY)
76/* TB consistency checks only implemented for usermode emulation. */
77#undef DEBUG_TB_CHECK
78#endif
79
80#define SMC_BITMAP_USE_THRESHOLD 10
81
82static TranslationBlock *tbs;
83static int code_gen_max_blocks;
84TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85static int nb_tbs;
86/* any access to the tbs or the page table must use this lock */
87spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88
89#if defined(__arm__) || defined(__sparc_v9__)
90/* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93#define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96#elif defined(_WIN32)
97/* Maximum alignment for Win32 is 16. */
98#define code_gen_section \
99 __attribute__((aligned (16)))
100#else
101#define code_gen_section \
102 __attribute__((aligned (32)))
103#endif
104
105uint8_t code_gen_prologue[1024] code_gen_section;
106static uint8_t *code_gen_buffer;
107static unsigned long code_gen_buffer_size;
108/* threshold to flush the translated code buffer */
109static unsigned long code_gen_buffer_max_size;
110static uint8_t *code_gen_ptr;
111
112#if !defined(CONFIG_USER_ONLY)
113int phys_ram_fd;
114static int in_migration;
115
116RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
117
118static MemoryRegion *system_memory;
119static MemoryRegion *system_io;
120
121MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122
123#endif
124
125CPUState *first_cpu;
126/* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128DEFINE_TLS(CPUState *,cpu_single_env);
129/* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132int use_icount = 0;
133
134typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141#if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143#endif
144} PageDesc;
145
146/* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148#if !defined(CONFIG_USER_ONLY)
149#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151#else
152# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153#endif
154#else
155# define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156#endif
157
158/* Size of the L2 (and L3, etc) page tables. */
159#define L2_BITS 10
160#define L2_SIZE (1 << L2_BITS)
161
162/* The bits remaining after N lower levels of page tables. */
163#define P_L1_BITS_REM \
164 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165#define V_L1_BITS_REM \
166 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
167
168/* Size of the L1 page table. Avoid silly small sizes. */
169#if P_L1_BITS_REM < 4
170#define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
171#else
172#define P_L1_BITS P_L1_BITS_REM
173#endif
174
175#if V_L1_BITS_REM < 4
176#define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
177#else
178#define V_L1_BITS V_L1_BITS_REM
179#endif
180
181#define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
182#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
183
184#define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
185#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
186
187unsigned long qemu_real_host_page_size;
188unsigned long qemu_host_page_size;
189unsigned long qemu_host_page_mask;
190
191/* This is a multi-level map on the virtual address space.
192 The bottom level has pointers to PageDesc. */
193static void *l1_map[V_L1_SIZE];
194
195#if !defined(CONFIG_USER_ONLY)
196typedef struct PhysPageDesc {
197 /* offset in host memory of the page + io_index in the low bits */
198 ram_addr_t phys_offset;
199 ram_addr_t region_offset;
200} PhysPageDesc;
201
202/* This is a multi-level map on the physical address space.
203 The bottom level has pointers to PhysPageDesc. */
204static void *l1_phys_map[P_L1_SIZE];
205
206static void io_mem_init(void);
207static void memory_map_init(void);
208
209/* io memory support */
210CPUWriteMemoryFunc *_io_mem_write[IO_MEM_NB_ENTRIES][4];
211CPUReadMemoryFunc *_io_mem_read[IO_MEM_NB_ENTRIES][4];
212void *io_mem_opaque[IO_MEM_NB_ENTRIES];
213static char io_mem_used[IO_MEM_NB_ENTRIES];
214static int io_mem_watch;
215#endif
216
217/* log support */
218#ifdef WIN32
219static const char *logfilename = "qemu.log";
220#else
221static const char *logfilename = "/tmp/qemu.log";
222#endif
223FILE *logfile;
224int loglevel;
225static int log_append = 0;
226
227/* statistics */
228#if !defined(CONFIG_USER_ONLY)
229static int tlb_flush_count;
230#endif
231static int tb_flush_count;
232static int tb_phys_invalidate_count;
233
234#ifdef _WIN32
235static void map_exec(void *addr, long size)
236{
237 DWORD old_protect;
238 VirtualProtect(addr, size,
239 PAGE_EXECUTE_READWRITE, &old_protect);
240
241}
242#else
243static void map_exec(void *addr, long size)
244{
245 unsigned long start, end, page_size;
246
247 page_size = getpagesize();
248 start = (unsigned long)addr;
249 start &= ~(page_size - 1);
250
251 end = (unsigned long)addr + size;
252 end += page_size - 1;
253 end &= ~(page_size - 1);
254
255 mprotect((void *)start, end - start,
256 PROT_READ | PROT_WRITE | PROT_EXEC);
257}
258#endif
259
260static void page_init(void)
261{
262 /* NOTE: we can always suppose that qemu_host_page_size >=
263 TARGET_PAGE_SIZE */
264#ifdef _WIN32
265 {
266 SYSTEM_INFO system_info;
267
268 GetSystemInfo(&system_info);
269 qemu_real_host_page_size = system_info.dwPageSize;
270 }
271#else
272 qemu_real_host_page_size = getpagesize();
273#endif
274 if (qemu_host_page_size == 0)
275 qemu_host_page_size = qemu_real_host_page_size;
276 if (qemu_host_page_size < TARGET_PAGE_SIZE)
277 qemu_host_page_size = TARGET_PAGE_SIZE;
278 qemu_host_page_mask = ~(qemu_host_page_size - 1);
279
280#if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
281 {
282#ifdef HAVE_KINFO_GETVMMAP
283 struct kinfo_vmentry *freep;
284 int i, cnt;
285
286 freep = kinfo_getvmmap(getpid(), &cnt);
287 if (freep) {
288 mmap_lock();
289 for (i = 0; i < cnt; i++) {
290 unsigned long startaddr, endaddr;
291
292 startaddr = freep[i].kve_start;
293 endaddr = freep[i].kve_end;
294 if (h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
296
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 } else {
301#if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
302 endaddr = ~0ul;
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304#endif
305 }
306 }
307 }
308 free(freep);
309 mmap_unlock();
310 }
311#else
312 FILE *f;
313
314 last_brk = (unsigned long)sbrk(0);
315
316 f = fopen("/compat/linux/proc/self/maps", "r");
317 if (f) {
318 mmap_lock();
319
320 do {
321 unsigned long startaddr, endaddr;
322 int n;
323
324 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
325
326 if (n == 2 && h2g_valid(startaddr)) {
327 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
328
329 if (h2g_valid(endaddr)) {
330 endaddr = h2g(endaddr);
331 } else {
332 endaddr = ~0ul;
333 }
334 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
335 }
336 } while (!feof(f));
337
338 fclose(f);
339 mmap_unlock();
340 }
341#endif
342 }
343#endif
344}
345
346static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
347{
348 PageDesc *pd;
349 void **lp;
350 int i;
351
352#if defined(CONFIG_USER_ONLY)
353 /* We can't use g_malloc because it may recurse into a locked mutex. */
354# define ALLOC(P, SIZE) \
355 do { \
356 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
357 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
358 } while (0)
359#else
360# define ALLOC(P, SIZE) \
361 do { P = g_malloc0(SIZE); } while (0)
362#endif
363
364 /* Level 1. Always allocated. */
365 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
366
367 /* Level 2..N-1. */
368 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
369 void **p = *lp;
370
371 if (p == NULL) {
372 if (!alloc) {
373 return NULL;
374 }
375 ALLOC(p, sizeof(void *) * L2_SIZE);
376 *lp = p;
377 }
378
379 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
380 }
381
382 pd = *lp;
383 if (pd == NULL) {
384 if (!alloc) {
385 return NULL;
386 }
387 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
388 *lp = pd;
389 }
390
391#undef ALLOC
392
393 return pd + (index & (L2_SIZE - 1));
394}
395
396static inline PageDesc *page_find(tb_page_addr_t index)
397{
398 return page_find_alloc(index, 0);
399}
400
401#if !defined(CONFIG_USER_ONLY)
402static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
403{
404 PhysPageDesc *pd;
405 void **lp;
406 int i;
407
408 /* Level 1. Always allocated. */
409 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
410
411 /* Level 2..N-1. */
412 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
413 void **p = *lp;
414 if (p == NULL) {
415 if (!alloc) {
416 return NULL;
417 }
418 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
419 }
420 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
421 }
422
423 pd = *lp;
424 if (pd == NULL) {
425 int i;
426 int first_index = index & ~(L2_SIZE - 1);
427
428 if (!alloc) {
429 return NULL;
430 }
431
432 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
433
434 for (i = 0; i < L2_SIZE; i++) {
435 pd[i].phys_offset = io_mem_unassigned.ram_addr;
436 pd[i].region_offset = (first_index + i) << TARGET_PAGE_BITS;
437 }
438 }
439
440 return pd + (index & (L2_SIZE - 1));
441}
442
443static inline PhysPageDesc phys_page_find(target_phys_addr_t index)
444{
445 PhysPageDesc *p = phys_page_find_alloc(index, 0);
446
447 if (p) {
448 return *p;
449 } else {
450 return (PhysPageDesc) {
451 .phys_offset = io_mem_unassigned.ram_addr,
452 .region_offset = index << TARGET_PAGE_BITS,
453 };
454 }
455}
456
457static void tlb_protect_code(ram_addr_t ram_addr);
458static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
459 target_ulong vaddr);
460#define mmap_lock() do { } while(0)
461#define mmap_unlock() do { } while(0)
462#endif
463
464#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
465
466#if defined(CONFIG_USER_ONLY)
467/* Currently it is not recommended to allocate big chunks of data in
468 user mode. It will change when a dedicated libc will be used */
469#define USE_STATIC_CODE_GEN_BUFFER
470#endif
471
472#ifdef USE_STATIC_CODE_GEN_BUFFER
473static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
474 __attribute__((aligned (CODE_GEN_ALIGN)));
475#endif
476
477static void code_gen_alloc(unsigned long tb_size)
478{
479#ifdef USE_STATIC_CODE_GEN_BUFFER
480 code_gen_buffer = static_code_gen_buffer;
481 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
482 map_exec(code_gen_buffer, code_gen_buffer_size);
483#else
484 code_gen_buffer_size = tb_size;
485 if (code_gen_buffer_size == 0) {
486#if defined(CONFIG_USER_ONLY)
487 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
488#else
489 /* XXX: needs adjustments */
490 code_gen_buffer_size = (unsigned long)(ram_size / 4);
491#endif
492 }
493 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
494 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
495 /* The code gen buffer location may have constraints depending on
496 the host cpu and OS */
497#if defined(__linux__)
498 {
499 int flags;
500 void *start = NULL;
501
502 flags = MAP_PRIVATE | MAP_ANONYMOUS;
503#if defined(__x86_64__)
504 flags |= MAP_32BIT;
505 /* Cannot map more than that */
506 if (code_gen_buffer_size > (800 * 1024 * 1024))
507 code_gen_buffer_size = (800 * 1024 * 1024);
508#elif defined(__sparc_v9__)
509 // Map the buffer below 2G, so we can use direct calls and branches
510 flags |= MAP_FIXED;
511 start = (void *) 0x60000000UL;
512 if (code_gen_buffer_size > (512 * 1024 * 1024))
513 code_gen_buffer_size = (512 * 1024 * 1024);
514#elif defined(__arm__)
515 /* Keep the buffer no bigger than 16GB to branch between blocks */
516 if (code_gen_buffer_size > 16 * 1024 * 1024)
517 code_gen_buffer_size = 16 * 1024 * 1024;
518#elif defined(__s390x__)
519 /* Map the buffer so that we can use direct calls and branches. */
520 /* We have a +- 4GB range on the branches; leave some slop. */
521 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
522 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
523 }
524 start = (void *)0x90000000UL;
525#endif
526 code_gen_buffer = mmap(start, code_gen_buffer_size,
527 PROT_WRITE | PROT_READ | PROT_EXEC,
528 flags, -1, 0);
529 if (code_gen_buffer == MAP_FAILED) {
530 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
531 exit(1);
532 }
533 }
534#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
535 || defined(__DragonFly__) || defined(__OpenBSD__) \
536 || defined(__NetBSD__)
537 {
538 int flags;
539 void *addr = NULL;
540 flags = MAP_PRIVATE | MAP_ANONYMOUS;
541#if defined(__x86_64__)
542 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
543 * 0x40000000 is free */
544 flags |= MAP_FIXED;
545 addr = (void *)0x40000000;
546 /* Cannot map more than that */
547 if (code_gen_buffer_size > (800 * 1024 * 1024))
548 code_gen_buffer_size = (800 * 1024 * 1024);
549#elif defined(__sparc_v9__)
550 // Map the buffer below 2G, so we can use direct calls and branches
551 flags |= MAP_FIXED;
552 addr = (void *) 0x60000000UL;
553 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
554 code_gen_buffer_size = (512 * 1024 * 1024);
555 }
556#endif
557 code_gen_buffer = mmap(addr, code_gen_buffer_size,
558 PROT_WRITE | PROT_READ | PROT_EXEC,
559 flags, -1, 0);
560 if (code_gen_buffer == MAP_FAILED) {
561 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
562 exit(1);
563 }
564 }
565#else
566 code_gen_buffer = g_malloc(code_gen_buffer_size);
567 map_exec(code_gen_buffer, code_gen_buffer_size);
568#endif
569#endif /* !USE_STATIC_CODE_GEN_BUFFER */
570 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
571 code_gen_buffer_max_size = code_gen_buffer_size -
572 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
573 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
574 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
575}
576
577/* Must be called before using the QEMU cpus. 'tb_size' is the size
578 (in bytes) allocated to the translation buffer. Zero means default
579 size. */
580void tcg_exec_init(unsigned long tb_size)
581{
582 cpu_gen_init();
583 code_gen_alloc(tb_size);
584 code_gen_ptr = code_gen_buffer;
585 page_init();
586#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
587 /* There's no guest base to take into account, so go ahead and
588 initialize the prologue now. */
589 tcg_prologue_init(&tcg_ctx);
590#endif
591}
592
593bool tcg_enabled(void)
594{
595 return code_gen_buffer != NULL;
596}
597
598void cpu_exec_init_all(void)
599{
600#if !defined(CONFIG_USER_ONLY)
601 memory_map_init();
602 io_mem_init();
603#endif
604}
605
606#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
607
608static int cpu_common_post_load(void *opaque, int version_id)
609{
610 CPUState *env = opaque;
611
612 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
613 version_id is increased. */
614 env->interrupt_request &= ~0x01;
615 tlb_flush(env, 1);
616
617 return 0;
618}
619
620static const VMStateDescription vmstate_cpu_common = {
621 .name = "cpu_common",
622 .version_id = 1,
623 .minimum_version_id = 1,
624 .minimum_version_id_old = 1,
625 .post_load = cpu_common_post_load,
626 .fields = (VMStateField []) {
627 VMSTATE_UINT32(halted, CPUState),
628 VMSTATE_UINT32(interrupt_request, CPUState),
629 VMSTATE_END_OF_LIST()
630 }
631};
632#endif
633
634CPUState *qemu_get_cpu(int cpu)
635{
636 CPUState *env = first_cpu;
637
638 while (env) {
639 if (env->cpu_index == cpu)
640 break;
641 env = env->next_cpu;
642 }
643
644 return env;
645}
646
647void cpu_exec_init(CPUState *env)
648{
649 CPUState **penv;
650 int cpu_index;
651
652#if defined(CONFIG_USER_ONLY)
653 cpu_list_lock();
654#endif
655 env->next_cpu = NULL;
656 penv = &first_cpu;
657 cpu_index = 0;
658 while (*penv != NULL) {
659 penv = &(*penv)->next_cpu;
660 cpu_index++;
661 }
662 env->cpu_index = cpu_index;
663 env->numa_node = 0;
664 QTAILQ_INIT(&env->breakpoints);
665 QTAILQ_INIT(&env->watchpoints);
666#ifndef CONFIG_USER_ONLY
667 env->thread_id = qemu_get_thread_id();
668#endif
669 *penv = env;
670#if defined(CONFIG_USER_ONLY)
671 cpu_list_unlock();
672#endif
673#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
674 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
675 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
676 cpu_save, cpu_load, env);
677#endif
678}
679
680/* Allocate a new translation block. Flush the translation buffer if
681 too many translation blocks or too much generated code. */
682static TranslationBlock *tb_alloc(target_ulong pc)
683{
684 TranslationBlock *tb;
685
686 if (nb_tbs >= code_gen_max_blocks ||
687 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
688 return NULL;
689 tb = &tbs[nb_tbs++];
690 tb->pc = pc;
691 tb->cflags = 0;
692 return tb;
693}
694
695void tb_free(TranslationBlock *tb)
696{
697 /* In practice this is mostly used for single use temporary TB
698 Ignore the hard cases and just back up if this TB happens to
699 be the last one generated. */
700 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
701 code_gen_ptr = tb->tc_ptr;
702 nb_tbs--;
703 }
704}
705
706static inline void invalidate_page_bitmap(PageDesc *p)
707{
708 if (p->code_bitmap) {
709 g_free(p->code_bitmap);
710 p->code_bitmap = NULL;
711 }
712 p->code_write_count = 0;
713}
714
715/* Set to NULL all the 'first_tb' fields in all PageDescs. */
716
717static void page_flush_tb_1 (int level, void **lp)
718{
719 int i;
720
721 if (*lp == NULL) {
722 return;
723 }
724 if (level == 0) {
725 PageDesc *pd = *lp;
726 for (i = 0; i < L2_SIZE; ++i) {
727 pd[i].first_tb = NULL;
728 invalidate_page_bitmap(pd + i);
729 }
730 } else {
731 void **pp = *lp;
732 for (i = 0; i < L2_SIZE; ++i) {
733 page_flush_tb_1 (level - 1, pp + i);
734 }
735 }
736}
737
738static void page_flush_tb(void)
739{
740 int i;
741 for (i = 0; i < V_L1_SIZE; i++) {
742 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
743 }
744}
745
746/* flush all the translation blocks */
747/* XXX: tb_flush is currently not thread safe */
748void tb_flush(CPUState *env1)
749{
750 CPUState *env;
751#if defined(DEBUG_FLUSH)
752 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
753 (unsigned long)(code_gen_ptr - code_gen_buffer),
754 nb_tbs, nb_tbs > 0 ?
755 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
756#endif
757 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
758 cpu_abort(env1, "Internal error: code buffer overflow\n");
759
760 nb_tbs = 0;
761
762 for(env = first_cpu; env != NULL; env = env->next_cpu) {
763 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
764 }
765
766 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
767 page_flush_tb();
768
769 code_gen_ptr = code_gen_buffer;
770 /* XXX: flush processor icache at this point if cache flush is
771 expensive */
772 tb_flush_count++;
773}
774
775#ifdef DEBUG_TB_CHECK
776
777static void tb_invalidate_check(target_ulong address)
778{
779 TranslationBlock *tb;
780 int i;
781 address &= TARGET_PAGE_MASK;
782 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
783 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
784 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
785 address >= tb->pc + tb->size)) {
786 printf("ERROR invalidate: address=" TARGET_FMT_lx
787 " PC=%08lx size=%04x\n",
788 address, (long)tb->pc, tb->size);
789 }
790 }
791 }
792}
793
794/* verify that all the pages have correct rights for code */
795static void tb_page_check(void)
796{
797 TranslationBlock *tb;
798 int i, flags1, flags2;
799
800 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
801 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
802 flags1 = page_get_flags(tb->pc);
803 flags2 = page_get_flags(tb->pc + tb->size - 1);
804 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
805 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
806 (long)tb->pc, tb->size, flags1, flags2);
807 }
808 }
809 }
810}
811
812#endif
813
814/* invalidate one TB */
815static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
816 int next_offset)
817{
818 TranslationBlock *tb1;
819 for(;;) {
820 tb1 = *ptb;
821 if (tb1 == tb) {
822 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
823 break;
824 }
825 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
826 }
827}
828
829static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
830{
831 TranslationBlock *tb1;
832 unsigned int n1;
833
834 for(;;) {
835 tb1 = *ptb;
836 n1 = (long)tb1 & 3;
837 tb1 = (TranslationBlock *)((long)tb1 & ~3);
838 if (tb1 == tb) {
839 *ptb = tb1->page_next[n1];
840 break;
841 }
842 ptb = &tb1->page_next[n1];
843 }
844}
845
846static inline void tb_jmp_remove(TranslationBlock *tb, int n)
847{
848 TranslationBlock *tb1, **ptb;
849 unsigned int n1;
850
851 ptb = &tb->jmp_next[n];
852 tb1 = *ptb;
853 if (tb1) {
854 /* find tb(n) in circular list */
855 for(;;) {
856 tb1 = *ptb;
857 n1 = (long)tb1 & 3;
858 tb1 = (TranslationBlock *)((long)tb1 & ~3);
859 if (n1 == n && tb1 == tb)
860 break;
861 if (n1 == 2) {
862 ptb = &tb1->jmp_first;
863 } else {
864 ptb = &tb1->jmp_next[n1];
865 }
866 }
867 /* now we can suppress tb(n) from the list */
868 *ptb = tb->jmp_next[n];
869
870 tb->jmp_next[n] = NULL;
871 }
872}
873
874/* reset the jump entry 'n' of a TB so that it is not chained to
875 another TB */
876static inline void tb_reset_jump(TranslationBlock *tb, int n)
877{
878 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
879}
880
881void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
882{
883 CPUState *env;
884 PageDesc *p;
885 unsigned int h, n1;
886 tb_page_addr_t phys_pc;
887 TranslationBlock *tb1, *tb2;
888
889 /* remove the TB from the hash list */
890 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
891 h = tb_phys_hash_func(phys_pc);
892 tb_remove(&tb_phys_hash[h], tb,
893 offsetof(TranslationBlock, phys_hash_next));
894
895 /* remove the TB from the page list */
896 if (tb->page_addr[0] != page_addr) {
897 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
898 tb_page_remove(&p->first_tb, tb);
899 invalidate_page_bitmap(p);
900 }
901 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
902 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
903 tb_page_remove(&p->first_tb, tb);
904 invalidate_page_bitmap(p);
905 }
906
907 tb_invalidated_flag = 1;
908
909 /* remove the TB from the hash list */
910 h = tb_jmp_cache_hash_func(tb->pc);
911 for(env = first_cpu; env != NULL; env = env->next_cpu) {
912 if (env->tb_jmp_cache[h] == tb)
913 env->tb_jmp_cache[h] = NULL;
914 }
915
916 /* suppress this TB from the two jump lists */
917 tb_jmp_remove(tb, 0);
918 tb_jmp_remove(tb, 1);
919
920 /* suppress any remaining jumps to this TB */
921 tb1 = tb->jmp_first;
922 for(;;) {
923 n1 = (long)tb1 & 3;
924 if (n1 == 2)
925 break;
926 tb1 = (TranslationBlock *)((long)tb1 & ~3);
927 tb2 = tb1->jmp_next[n1];
928 tb_reset_jump(tb1, n1);
929 tb1->jmp_next[n1] = NULL;
930 tb1 = tb2;
931 }
932 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
933
934 tb_phys_invalidate_count++;
935}
936
937static inline void set_bits(uint8_t *tab, int start, int len)
938{
939 int end, mask, end1;
940
941 end = start + len;
942 tab += start >> 3;
943 mask = 0xff << (start & 7);
944 if ((start & ~7) == (end & ~7)) {
945 if (start < end) {
946 mask &= ~(0xff << (end & 7));
947 *tab |= mask;
948 }
949 } else {
950 *tab++ |= mask;
951 start = (start + 8) & ~7;
952 end1 = end & ~7;
953 while (start < end1) {
954 *tab++ = 0xff;
955 start += 8;
956 }
957 if (start < end) {
958 mask = ~(0xff << (end & 7));
959 *tab |= mask;
960 }
961 }
962}
963
964static void build_page_bitmap(PageDesc *p)
965{
966 int n, tb_start, tb_end;
967 TranslationBlock *tb;
968
969 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
970
971 tb = p->first_tb;
972 while (tb != NULL) {
973 n = (long)tb & 3;
974 tb = (TranslationBlock *)((long)tb & ~3);
975 /* NOTE: this is subtle as a TB may span two physical pages */
976 if (n == 0) {
977 /* NOTE: tb_end may be after the end of the page, but
978 it is not a problem */
979 tb_start = tb->pc & ~TARGET_PAGE_MASK;
980 tb_end = tb_start + tb->size;
981 if (tb_end > TARGET_PAGE_SIZE)
982 tb_end = TARGET_PAGE_SIZE;
983 } else {
984 tb_start = 0;
985 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
986 }
987 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
988 tb = tb->page_next[n];
989 }
990}
991
992TranslationBlock *tb_gen_code(CPUState *env,
993 target_ulong pc, target_ulong cs_base,
994 int flags, int cflags)
995{
996 TranslationBlock *tb;
997 uint8_t *tc_ptr;
998 tb_page_addr_t phys_pc, phys_page2;
999 target_ulong virt_page2;
1000 int code_gen_size;
1001
1002 phys_pc = get_page_addr_code(env, pc);
1003 tb = tb_alloc(pc);
1004 if (!tb) {
1005 /* flush must be done */
1006 tb_flush(env);
1007 /* cannot fail at this point */
1008 tb = tb_alloc(pc);
1009 /* Don't forget to invalidate previous TB info. */
1010 tb_invalidated_flag = 1;
1011 }
1012 tc_ptr = code_gen_ptr;
1013 tb->tc_ptr = tc_ptr;
1014 tb->cs_base = cs_base;
1015 tb->flags = flags;
1016 tb->cflags = cflags;
1017 cpu_gen_code(env, tb, &code_gen_size);
1018 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1019
1020 /* check next page if needed */
1021 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1022 phys_page2 = -1;
1023 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1024 phys_page2 = get_page_addr_code(env, virt_page2);
1025 }
1026 tb_link_page(tb, phys_pc, phys_page2);
1027 return tb;
1028}
1029
1030/* invalidate all TBs which intersect with the target physical page
1031 starting in range [start;end[. NOTE: start and end must refer to
1032 the same physical page. 'is_cpu_write_access' should be true if called
1033 from a real cpu write access: the virtual CPU will exit the current
1034 TB if code is modified inside this TB. */
1035void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1036 int is_cpu_write_access)
1037{
1038 TranslationBlock *tb, *tb_next, *saved_tb;
1039 CPUState *env = cpu_single_env;
1040 tb_page_addr_t tb_start, tb_end;
1041 PageDesc *p;
1042 int n;
1043#ifdef TARGET_HAS_PRECISE_SMC
1044 int current_tb_not_found = is_cpu_write_access;
1045 TranslationBlock *current_tb = NULL;
1046 int current_tb_modified = 0;
1047 target_ulong current_pc = 0;
1048 target_ulong current_cs_base = 0;
1049 int current_flags = 0;
1050#endif /* TARGET_HAS_PRECISE_SMC */
1051
1052 p = page_find(start >> TARGET_PAGE_BITS);
1053 if (!p)
1054 return;
1055 if (!p->code_bitmap &&
1056 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1057 is_cpu_write_access) {
1058 /* build code bitmap */
1059 build_page_bitmap(p);
1060 }
1061
1062 /* we remove all the TBs in the range [start, end[ */
1063 /* XXX: see if in some cases it could be faster to invalidate all the code */
1064 tb = p->first_tb;
1065 while (tb != NULL) {
1066 n = (long)tb & 3;
1067 tb = (TranslationBlock *)((long)tb & ~3);
1068 tb_next = tb->page_next[n];
1069 /* NOTE: this is subtle as a TB may span two physical pages */
1070 if (n == 0) {
1071 /* NOTE: tb_end may be after the end of the page, but
1072 it is not a problem */
1073 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1074 tb_end = tb_start + tb->size;
1075 } else {
1076 tb_start = tb->page_addr[1];
1077 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1078 }
1079 if (!(tb_end <= start || tb_start >= end)) {
1080#ifdef TARGET_HAS_PRECISE_SMC
1081 if (current_tb_not_found) {
1082 current_tb_not_found = 0;
1083 current_tb = NULL;
1084 if (env->mem_io_pc) {
1085 /* now we have a real cpu fault */
1086 current_tb = tb_find_pc(env->mem_io_pc);
1087 }
1088 }
1089 if (current_tb == tb &&
1090 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1091 /* If we are modifying the current TB, we must stop
1092 its execution. We could be more precise by checking
1093 that the modification is after the current PC, but it
1094 would require a specialized function to partially
1095 restore the CPU state */
1096
1097 current_tb_modified = 1;
1098 cpu_restore_state(current_tb, env, env->mem_io_pc);
1099 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1100 &current_flags);
1101 }
1102#endif /* TARGET_HAS_PRECISE_SMC */
1103 /* we need to do that to handle the case where a signal
1104 occurs while doing tb_phys_invalidate() */
1105 saved_tb = NULL;
1106 if (env) {
1107 saved_tb = env->current_tb;
1108 env->current_tb = NULL;
1109 }
1110 tb_phys_invalidate(tb, -1);
1111 if (env) {
1112 env->current_tb = saved_tb;
1113 if (env->interrupt_request && env->current_tb)
1114 cpu_interrupt(env, env->interrupt_request);
1115 }
1116 }
1117 tb = tb_next;
1118 }
1119#if !defined(CONFIG_USER_ONLY)
1120 /* if no code remaining, no need to continue to use slow writes */
1121 if (!p->first_tb) {
1122 invalidate_page_bitmap(p);
1123 if (is_cpu_write_access) {
1124 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1125 }
1126 }
1127#endif
1128#ifdef TARGET_HAS_PRECISE_SMC
1129 if (current_tb_modified) {
1130 /* we generate a block containing just the instruction
1131 modifying the memory. It will ensure that it cannot modify
1132 itself */
1133 env->current_tb = NULL;
1134 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1135 cpu_resume_from_signal(env, NULL);
1136 }
1137#endif
1138}
1139
1140/* len must be <= 8 and start must be a multiple of len */
1141static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1142{
1143 PageDesc *p;
1144 int offset, b;
1145#if 0
1146 if (1) {
1147 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1148 cpu_single_env->mem_io_vaddr, len,
1149 cpu_single_env->eip,
1150 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1151 }
1152#endif
1153 p = page_find(start >> TARGET_PAGE_BITS);
1154 if (!p)
1155 return;
1156 if (p->code_bitmap) {
1157 offset = start & ~TARGET_PAGE_MASK;
1158 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1159 if (b & ((1 << len) - 1))
1160 goto do_invalidate;
1161 } else {
1162 do_invalidate:
1163 tb_invalidate_phys_page_range(start, start + len, 1);
1164 }
1165}
1166
1167#if !defined(CONFIG_SOFTMMU)
1168static void tb_invalidate_phys_page(tb_page_addr_t addr,
1169 unsigned long pc, void *puc)
1170{
1171 TranslationBlock *tb;
1172 PageDesc *p;
1173 int n;
1174#ifdef TARGET_HAS_PRECISE_SMC
1175 TranslationBlock *current_tb = NULL;
1176 CPUState *env = cpu_single_env;
1177 int current_tb_modified = 0;
1178 target_ulong current_pc = 0;
1179 target_ulong current_cs_base = 0;
1180 int current_flags = 0;
1181#endif
1182
1183 addr &= TARGET_PAGE_MASK;
1184 p = page_find(addr >> TARGET_PAGE_BITS);
1185 if (!p)
1186 return;
1187 tb = p->first_tb;
1188#ifdef TARGET_HAS_PRECISE_SMC
1189 if (tb && pc != 0) {
1190 current_tb = tb_find_pc(pc);
1191 }
1192#endif
1193 while (tb != NULL) {
1194 n = (long)tb & 3;
1195 tb = (TranslationBlock *)((long)tb & ~3);
1196#ifdef TARGET_HAS_PRECISE_SMC
1197 if (current_tb == tb &&
1198 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1199 /* If we are modifying the current TB, we must stop
1200 its execution. We could be more precise by checking
1201 that the modification is after the current PC, but it
1202 would require a specialized function to partially
1203 restore the CPU state */
1204
1205 current_tb_modified = 1;
1206 cpu_restore_state(current_tb, env, pc);
1207 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1208 &current_flags);
1209 }
1210#endif /* TARGET_HAS_PRECISE_SMC */
1211 tb_phys_invalidate(tb, addr);
1212 tb = tb->page_next[n];
1213 }
1214 p->first_tb = NULL;
1215#ifdef TARGET_HAS_PRECISE_SMC
1216 if (current_tb_modified) {
1217 /* we generate a block containing just the instruction
1218 modifying the memory. It will ensure that it cannot modify
1219 itself */
1220 env->current_tb = NULL;
1221 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1222 cpu_resume_from_signal(env, puc);
1223 }
1224#endif
1225}
1226#endif
1227
1228/* add the tb in the target page and protect it if necessary */
1229static inline void tb_alloc_page(TranslationBlock *tb,
1230 unsigned int n, tb_page_addr_t page_addr)
1231{
1232 PageDesc *p;
1233#ifndef CONFIG_USER_ONLY
1234 bool page_already_protected;
1235#endif
1236
1237 tb->page_addr[n] = page_addr;
1238 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1239 tb->page_next[n] = p->first_tb;
1240#ifndef CONFIG_USER_ONLY
1241 page_already_protected = p->first_tb != NULL;
1242#endif
1243 p->first_tb = (TranslationBlock *)((long)tb | n);
1244 invalidate_page_bitmap(p);
1245
1246#if defined(TARGET_HAS_SMC) || 1
1247
1248#if defined(CONFIG_USER_ONLY)
1249 if (p->flags & PAGE_WRITE) {
1250 target_ulong addr;
1251 PageDesc *p2;
1252 int prot;
1253
1254 /* force the host page as non writable (writes will have a
1255 page fault + mprotect overhead) */
1256 page_addr &= qemu_host_page_mask;
1257 prot = 0;
1258 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1259 addr += TARGET_PAGE_SIZE) {
1260
1261 p2 = page_find (addr >> TARGET_PAGE_BITS);
1262 if (!p2)
1263 continue;
1264 prot |= p2->flags;
1265 p2->flags &= ~PAGE_WRITE;
1266 }
1267 mprotect(g2h(page_addr), qemu_host_page_size,
1268 (prot & PAGE_BITS) & ~PAGE_WRITE);
1269#ifdef DEBUG_TB_INVALIDATE
1270 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1271 page_addr);
1272#endif
1273 }
1274#else
1275 /* if some code is already present, then the pages are already
1276 protected. So we handle the case where only the first TB is
1277 allocated in a physical page */
1278 if (!page_already_protected) {
1279 tlb_protect_code(page_addr);
1280 }
1281#endif
1282
1283#endif /* TARGET_HAS_SMC */
1284}
1285
1286/* add a new TB and link it to the physical page tables. phys_page2 is
1287 (-1) to indicate that only one page contains the TB. */
1288void tb_link_page(TranslationBlock *tb,
1289 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1290{
1291 unsigned int h;
1292 TranslationBlock **ptb;
1293
1294 /* Grab the mmap lock to stop another thread invalidating this TB
1295 before we are done. */
1296 mmap_lock();
1297 /* add in the physical hash table */
1298 h = tb_phys_hash_func(phys_pc);
1299 ptb = &tb_phys_hash[h];
1300 tb->phys_hash_next = *ptb;
1301 *ptb = tb;
1302
1303 /* add in the page list */
1304 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1305 if (phys_page2 != -1)
1306 tb_alloc_page(tb, 1, phys_page2);
1307 else
1308 tb->page_addr[1] = -1;
1309
1310 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1311 tb->jmp_next[0] = NULL;
1312 tb->jmp_next[1] = NULL;
1313
1314 /* init original jump addresses */
1315 if (tb->tb_next_offset[0] != 0xffff)
1316 tb_reset_jump(tb, 0);
1317 if (tb->tb_next_offset[1] != 0xffff)
1318 tb_reset_jump(tb, 1);
1319
1320#ifdef DEBUG_TB_CHECK
1321 tb_page_check();
1322#endif
1323 mmap_unlock();
1324}
1325
1326/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1327 tb[1].tc_ptr. Return NULL if not found */
1328TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1329{
1330 int m_min, m_max, m;
1331 unsigned long v;
1332 TranslationBlock *tb;
1333
1334 if (nb_tbs <= 0)
1335 return NULL;
1336 if (tc_ptr < (unsigned long)code_gen_buffer ||
1337 tc_ptr >= (unsigned long)code_gen_ptr)
1338 return NULL;
1339 /* binary search (cf Knuth) */
1340 m_min = 0;
1341 m_max = nb_tbs - 1;
1342 while (m_min <= m_max) {
1343 m = (m_min + m_max) >> 1;
1344 tb = &tbs[m];
1345 v = (unsigned long)tb->tc_ptr;
1346 if (v == tc_ptr)
1347 return tb;
1348 else if (tc_ptr < v) {
1349 m_max = m - 1;
1350 } else {
1351 m_min = m + 1;
1352 }
1353 }
1354 return &tbs[m_max];
1355}
1356
1357static void tb_reset_jump_recursive(TranslationBlock *tb);
1358
1359static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1360{
1361 TranslationBlock *tb1, *tb_next, **ptb;
1362 unsigned int n1;
1363
1364 tb1 = tb->jmp_next[n];
1365 if (tb1 != NULL) {
1366 /* find head of list */
1367 for(;;) {
1368 n1 = (long)tb1 & 3;
1369 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1370 if (n1 == 2)
1371 break;
1372 tb1 = tb1->jmp_next[n1];
1373 }
1374 /* we are now sure now that tb jumps to tb1 */
1375 tb_next = tb1;
1376
1377 /* remove tb from the jmp_first list */
1378 ptb = &tb_next->jmp_first;
1379 for(;;) {
1380 tb1 = *ptb;
1381 n1 = (long)tb1 & 3;
1382 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1383 if (n1 == n && tb1 == tb)
1384 break;
1385 ptb = &tb1->jmp_next[n1];
1386 }
1387 *ptb = tb->jmp_next[n];
1388 tb->jmp_next[n] = NULL;
1389
1390 /* suppress the jump to next tb in generated code */
1391 tb_reset_jump(tb, n);
1392
1393 /* suppress jumps in the tb on which we could have jumped */
1394 tb_reset_jump_recursive(tb_next);
1395 }
1396}
1397
1398static void tb_reset_jump_recursive(TranslationBlock *tb)
1399{
1400 tb_reset_jump_recursive2(tb, 0);
1401 tb_reset_jump_recursive2(tb, 1);
1402}
1403
1404#if defined(TARGET_HAS_ICE)
1405#if defined(CONFIG_USER_ONLY)
1406static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1407{
1408 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1409}
1410#else
1411static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1412{
1413 target_phys_addr_t addr;
1414 target_ulong pd;
1415 ram_addr_t ram_addr;
1416 PhysPageDesc p;
1417
1418 addr = cpu_get_phys_page_debug(env, pc);
1419 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1420 pd = p.phys_offset;
1421 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1422 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1423}
1424#endif
1425#endif /* TARGET_HAS_ICE */
1426
1427#if defined(CONFIG_USER_ONLY)
1428void cpu_watchpoint_remove_all(CPUState *env, int mask)
1429
1430{
1431}
1432
1433int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1434 int flags, CPUWatchpoint **watchpoint)
1435{
1436 return -ENOSYS;
1437}
1438#else
1439/* Add a watchpoint. */
1440int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1441 int flags, CPUWatchpoint **watchpoint)
1442{
1443 target_ulong len_mask = ~(len - 1);
1444 CPUWatchpoint *wp;
1445
1446 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1447 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1448 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1449 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1450 return -EINVAL;
1451 }
1452 wp = g_malloc(sizeof(*wp));
1453
1454 wp->vaddr = addr;
1455 wp->len_mask = len_mask;
1456 wp->flags = flags;
1457
1458 /* keep all GDB-injected watchpoints in front */
1459 if (flags & BP_GDB)
1460 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1461 else
1462 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1463
1464 tlb_flush_page(env, addr);
1465
1466 if (watchpoint)
1467 *watchpoint = wp;
1468 return 0;
1469}
1470
1471/* Remove a specific watchpoint. */
1472int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1473 int flags)
1474{
1475 target_ulong len_mask = ~(len - 1);
1476 CPUWatchpoint *wp;
1477
1478 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1479 if (addr == wp->vaddr && len_mask == wp->len_mask
1480 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1481 cpu_watchpoint_remove_by_ref(env, wp);
1482 return 0;
1483 }
1484 }
1485 return -ENOENT;
1486}
1487
1488/* Remove a specific watchpoint by reference. */
1489void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1490{
1491 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1492
1493 tlb_flush_page(env, watchpoint->vaddr);
1494
1495 g_free(watchpoint);
1496}
1497
1498/* Remove all matching watchpoints. */
1499void cpu_watchpoint_remove_all(CPUState *env, int mask)
1500{
1501 CPUWatchpoint *wp, *next;
1502
1503 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1504 if (wp->flags & mask)
1505 cpu_watchpoint_remove_by_ref(env, wp);
1506 }
1507}
1508#endif
1509
1510/* Add a breakpoint. */
1511int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1512 CPUBreakpoint **breakpoint)
1513{
1514#if defined(TARGET_HAS_ICE)
1515 CPUBreakpoint *bp;
1516
1517 bp = g_malloc(sizeof(*bp));
1518
1519 bp->pc = pc;
1520 bp->flags = flags;
1521
1522 /* keep all GDB-injected breakpoints in front */
1523 if (flags & BP_GDB)
1524 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1525 else
1526 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1527
1528 breakpoint_invalidate(env, pc);
1529
1530 if (breakpoint)
1531 *breakpoint = bp;
1532 return 0;
1533#else
1534 return -ENOSYS;
1535#endif
1536}
1537
1538/* Remove a specific breakpoint. */
1539int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1540{
1541#if defined(TARGET_HAS_ICE)
1542 CPUBreakpoint *bp;
1543
1544 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1545 if (bp->pc == pc && bp->flags == flags) {
1546 cpu_breakpoint_remove_by_ref(env, bp);
1547 return 0;
1548 }
1549 }
1550 return -ENOENT;
1551#else
1552 return -ENOSYS;
1553#endif
1554}
1555
1556/* Remove a specific breakpoint by reference. */
1557void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1558{
1559#if defined(TARGET_HAS_ICE)
1560 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1561
1562 breakpoint_invalidate(env, breakpoint->pc);
1563
1564 g_free(breakpoint);
1565#endif
1566}
1567
1568/* Remove all matching breakpoints. */
1569void cpu_breakpoint_remove_all(CPUState *env, int mask)
1570{
1571#if defined(TARGET_HAS_ICE)
1572 CPUBreakpoint *bp, *next;
1573
1574 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1575 if (bp->flags & mask)
1576 cpu_breakpoint_remove_by_ref(env, bp);
1577 }
1578#endif
1579}
1580
1581/* enable or disable single step mode. EXCP_DEBUG is returned by the
1582 CPU loop after each instruction */
1583void cpu_single_step(CPUState *env, int enabled)
1584{
1585#if defined(TARGET_HAS_ICE)
1586 if (env->singlestep_enabled != enabled) {
1587 env->singlestep_enabled = enabled;
1588 if (kvm_enabled())
1589 kvm_update_guest_debug(env, 0);
1590 else {
1591 /* must flush all the translated code to avoid inconsistencies */
1592 /* XXX: only flush what is necessary */
1593 tb_flush(env);
1594 }
1595 }
1596#endif
1597}
1598
1599/* enable or disable low levels log */
1600void cpu_set_log(int log_flags)
1601{
1602 loglevel = log_flags;
1603 if (loglevel && !logfile) {
1604 logfile = fopen(logfilename, log_append ? "a" : "w");
1605 if (!logfile) {
1606 perror(logfilename);
1607 _exit(1);
1608 }
1609#if !defined(CONFIG_SOFTMMU)
1610 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1611 {
1612 static char logfile_buf[4096];
1613 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1614 }
1615#elif defined(_WIN32)
1616 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1617 setvbuf(logfile, NULL, _IONBF, 0);
1618#else
1619 setvbuf(logfile, NULL, _IOLBF, 0);
1620#endif
1621 log_append = 1;
1622 }
1623 if (!loglevel && logfile) {
1624 fclose(logfile);
1625 logfile = NULL;
1626 }
1627}
1628
1629void cpu_set_log_filename(const char *filename)
1630{
1631 logfilename = strdup(filename);
1632 if (logfile) {
1633 fclose(logfile);
1634 logfile = NULL;
1635 }
1636 cpu_set_log(loglevel);
1637}
1638
1639static void cpu_unlink_tb(CPUState *env)
1640{
1641 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1642 problem and hope the cpu will stop of its own accord. For userspace
1643 emulation this often isn't actually as bad as it sounds. Often
1644 signals are used primarily to interrupt blocking syscalls. */
1645 TranslationBlock *tb;
1646 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1647
1648 spin_lock(&interrupt_lock);
1649 tb = env->current_tb;
1650 /* if the cpu is currently executing code, we must unlink it and
1651 all the potentially executing TB */
1652 if (tb) {
1653 env->current_tb = NULL;
1654 tb_reset_jump_recursive(tb);
1655 }
1656 spin_unlock(&interrupt_lock);
1657}
1658
1659#ifndef CONFIG_USER_ONLY
1660/* mask must never be zero, except for A20 change call */
1661static void tcg_handle_interrupt(CPUState *env, int mask)
1662{
1663 int old_mask;
1664
1665 old_mask = env->interrupt_request;
1666 env->interrupt_request |= mask;
1667
1668 /*
1669 * If called from iothread context, wake the target cpu in
1670 * case its halted.
1671 */
1672 if (!qemu_cpu_is_self(env)) {
1673 qemu_cpu_kick(env);
1674 return;
1675 }
1676
1677 if (use_icount) {
1678 env->icount_decr.u16.high = 0xffff;
1679 if (!can_do_io(env)
1680 && (mask & ~old_mask) != 0) {
1681 cpu_abort(env, "Raised interrupt while not in I/O function");
1682 }
1683 } else {
1684 cpu_unlink_tb(env);
1685 }
1686}
1687
1688CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1689
1690#else /* CONFIG_USER_ONLY */
1691
1692void cpu_interrupt(CPUState *env, int mask)
1693{
1694 env->interrupt_request |= mask;
1695 cpu_unlink_tb(env);
1696}
1697#endif /* CONFIG_USER_ONLY */
1698
1699void cpu_reset_interrupt(CPUState *env, int mask)
1700{
1701 env->interrupt_request &= ~mask;
1702}
1703
1704void cpu_exit(CPUState *env)
1705{
1706 env->exit_request = 1;
1707 cpu_unlink_tb(env);
1708}
1709
1710const CPULogItem cpu_log_items[] = {
1711 { CPU_LOG_TB_OUT_ASM, "out_asm",
1712 "show generated host assembly code for each compiled TB" },
1713 { CPU_LOG_TB_IN_ASM, "in_asm",
1714 "show target assembly code for each compiled TB" },
1715 { CPU_LOG_TB_OP, "op",
1716 "show micro ops for each compiled TB" },
1717 { CPU_LOG_TB_OP_OPT, "op_opt",
1718 "show micro ops "
1719#ifdef TARGET_I386
1720 "before eflags optimization and "
1721#endif
1722 "after liveness analysis" },
1723 { CPU_LOG_INT, "int",
1724 "show interrupts/exceptions in short format" },
1725 { CPU_LOG_EXEC, "exec",
1726 "show trace before each executed TB (lots of logs)" },
1727 { CPU_LOG_TB_CPU, "cpu",
1728 "show CPU state before block translation" },
1729#ifdef TARGET_I386
1730 { CPU_LOG_PCALL, "pcall",
1731 "show protected mode far calls/returns/exceptions" },
1732 { CPU_LOG_RESET, "cpu_reset",
1733 "show CPU state before CPU resets" },
1734#endif
1735#ifdef DEBUG_IOPORT
1736 { CPU_LOG_IOPORT, "ioport",
1737 "show all i/o ports accesses" },
1738#endif
1739 { 0, NULL, NULL },
1740};
1741
1742static int cmp1(const char *s1, int n, const char *s2)
1743{
1744 if (strlen(s2) != n)
1745 return 0;
1746 return memcmp(s1, s2, n) == 0;
1747}
1748
1749/* takes a comma separated list of log masks. Return 0 if error. */
1750int cpu_str_to_log_mask(const char *str)
1751{
1752 const CPULogItem *item;
1753 int mask;
1754 const char *p, *p1;
1755
1756 p = str;
1757 mask = 0;
1758 for(;;) {
1759 p1 = strchr(p, ',');
1760 if (!p1)
1761 p1 = p + strlen(p);
1762 if(cmp1(p,p1-p,"all")) {
1763 for(item = cpu_log_items; item->mask != 0; item++) {
1764 mask |= item->mask;
1765 }
1766 } else {
1767 for(item = cpu_log_items; item->mask != 0; item++) {
1768 if (cmp1(p, p1 - p, item->name))
1769 goto found;
1770 }
1771 return 0;
1772 }
1773 found:
1774 mask |= item->mask;
1775 if (*p1 != ',')
1776 break;
1777 p = p1 + 1;
1778 }
1779 return mask;
1780}
1781
1782void cpu_abort(CPUState *env, const char *fmt, ...)
1783{
1784 va_list ap;
1785 va_list ap2;
1786
1787 va_start(ap, fmt);
1788 va_copy(ap2, ap);
1789 fprintf(stderr, "qemu: fatal: ");
1790 vfprintf(stderr, fmt, ap);
1791 fprintf(stderr, "\n");
1792#ifdef TARGET_I386
1793 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1794#else
1795 cpu_dump_state(env, stderr, fprintf, 0);
1796#endif
1797 if (qemu_log_enabled()) {
1798 qemu_log("qemu: fatal: ");
1799 qemu_log_vprintf(fmt, ap2);
1800 qemu_log("\n");
1801#ifdef TARGET_I386
1802 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1803#else
1804 log_cpu_state(env, 0);
1805#endif
1806 qemu_log_flush();
1807 qemu_log_close();
1808 }
1809 va_end(ap2);
1810 va_end(ap);
1811#if defined(CONFIG_USER_ONLY)
1812 {
1813 struct sigaction act;
1814 sigfillset(&act.sa_mask);
1815 act.sa_handler = SIG_DFL;
1816 sigaction(SIGABRT, &act, NULL);
1817 }
1818#endif
1819 abort();
1820}
1821
1822CPUState *cpu_copy(CPUState *env)
1823{
1824 CPUState *new_env = cpu_init(env->cpu_model_str);
1825 CPUState *next_cpu = new_env->next_cpu;
1826 int cpu_index = new_env->cpu_index;
1827#if defined(TARGET_HAS_ICE)
1828 CPUBreakpoint *bp;
1829 CPUWatchpoint *wp;
1830#endif
1831
1832 memcpy(new_env, env, sizeof(CPUState));
1833
1834 /* Preserve chaining and index. */
1835 new_env->next_cpu = next_cpu;
1836 new_env->cpu_index = cpu_index;
1837
1838 /* Clone all break/watchpoints.
1839 Note: Once we support ptrace with hw-debug register access, make sure
1840 BP_CPU break/watchpoints are handled correctly on clone. */
1841 QTAILQ_INIT(&env->breakpoints);
1842 QTAILQ_INIT(&env->watchpoints);
1843#if defined(TARGET_HAS_ICE)
1844 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1845 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1846 }
1847 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1848 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1849 wp->flags, NULL);
1850 }
1851#endif
1852
1853 return new_env;
1854}
1855
1856#if !defined(CONFIG_USER_ONLY)
1857
1858static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1859{
1860 unsigned int i;
1861
1862 /* Discard jump cache entries for any tb which might potentially
1863 overlap the flushed page. */
1864 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1865 memset (&env->tb_jmp_cache[i], 0,
1866 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1867
1868 i = tb_jmp_cache_hash_page(addr);
1869 memset (&env->tb_jmp_cache[i], 0,
1870 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1871}
1872
1873static CPUTLBEntry s_cputlb_empty_entry = {
1874 .addr_read = -1,
1875 .addr_write = -1,
1876 .addr_code = -1,
1877 .addend = -1,
1878};
1879
1880/* NOTE: if flush_global is true, also flush global entries (not
1881 implemented yet) */
1882void tlb_flush(CPUState *env, int flush_global)
1883{
1884 int i;
1885
1886#if defined(DEBUG_TLB)
1887 printf("tlb_flush:\n");
1888#endif
1889 /* must reset current TB so that interrupts cannot modify the
1890 links while we are modifying them */
1891 env->current_tb = NULL;
1892
1893 for(i = 0; i < CPU_TLB_SIZE; i++) {
1894 int mmu_idx;
1895 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1896 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1897 }
1898 }
1899
1900 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1901
1902 env->tlb_flush_addr = -1;
1903 env->tlb_flush_mask = 0;
1904 tlb_flush_count++;
1905}
1906
1907static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1908{
1909 if (addr == (tlb_entry->addr_read &
1910 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1911 addr == (tlb_entry->addr_write &
1912 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1913 addr == (tlb_entry->addr_code &
1914 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1915 *tlb_entry = s_cputlb_empty_entry;
1916 }
1917}
1918
1919void tlb_flush_page(CPUState *env, target_ulong addr)
1920{
1921 int i;
1922 int mmu_idx;
1923
1924#if defined(DEBUG_TLB)
1925 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1926#endif
1927 /* Check if we need to flush due to large pages. */
1928 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1929#if defined(DEBUG_TLB)
1930 printf("tlb_flush_page: forced full flush ("
1931 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1932 env->tlb_flush_addr, env->tlb_flush_mask);
1933#endif
1934 tlb_flush(env, 1);
1935 return;
1936 }
1937 /* must reset current TB so that interrupts cannot modify the
1938 links while we are modifying them */
1939 env->current_tb = NULL;
1940
1941 addr &= TARGET_PAGE_MASK;
1942 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1943 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1944 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
1945
1946 tlb_flush_jmp_cache(env, addr);
1947}
1948
1949/* update the TLBs so that writes to code in the virtual page 'addr'
1950 can be detected */
1951static void tlb_protect_code(ram_addr_t ram_addr)
1952{
1953 cpu_physical_memory_reset_dirty(ram_addr,
1954 ram_addr + TARGET_PAGE_SIZE,
1955 CODE_DIRTY_FLAG);
1956}
1957
1958/* update the TLB so that writes in physical page 'phys_addr' are no longer
1959 tested for self modifying code */
1960static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
1961 target_ulong vaddr)
1962{
1963 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
1964}
1965
1966static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
1967 unsigned long start, unsigned long length)
1968{
1969 unsigned long addr;
1970 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
1971 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
1972 if ((addr - start) < length) {
1973 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
1974 }
1975 }
1976}
1977
1978/* Note: start and end must be within the same ram block. */
1979void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1980 int dirty_flags)
1981{
1982 CPUState *env;
1983 unsigned long length, start1;
1984 int i;
1985
1986 start &= TARGET_PAGE_MASK;
1987 end = TARGET_PAGE_ALIGN(end);
1988
1989 length = end - start;
1990 if (length == 0)
1991 return;
1992 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1993
1994 /* we modify the TLB cache so that the dirty bit will be set again
1995 when accessing the range */
1996 start1 = (unsigned long)qemu_safe_ram_ptr(start);
1997 /* Check that we don't span multiple blocks - this breaks the
1998 address comparisons below. */
1999 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2000 != (end - 1) - start) {
2001 abort();
2002 }
2003
2004 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2005 int mmu_idx;
2006 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2007 for(i = 0; i < CPU_TLB_SIZE; i++)
2008 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2009 start1, length);
2010 }
2011 }
2012}
2013
2014int cpu_physical_memory_set_dirty_tracking(int enable)
2015{
2016 int ret = 0;
2017 in_migration = enable;
2018 return ret;
2019}
2020
2021static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2022{
2023 ram_addr_t ram_addr;
2024 void *p;
2025
2026 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2027 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2028 + tlb_entry->addend);
2029 ram_addr = qemu_ram_addr_from_host_nofail(p);
2030 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2031 tlb_entry->addr_write |= TLB_NOTDIRTY;
2032 }
2033 }
2034}
2035
2036/* update the TLB according to the current state of the dirty bits */
2037void cpu_tlb_update_dirty(CPUState *env)
2038{
2039 int i;
2040 int mmu_idx;
2041 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2042 for(i = 0; i < CPU_TLB_SIZE; i++)
2043 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2044 }
2045}
2046
2047static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2048{
2049 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2050 tlb_entry->addr_write = vaddr;
2051}
2052
2053/* update the TLB corresponding to virtual page vaddr
2054 so that it is no longer dirty */
2055static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2056{
2057 int i;
2058 int mmu_idx;
2059
2060 vaddr &= TARGET_PAGE_MASK;
2061 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2062 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2063 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2064}
2065
2066/* Our TLB does not support large pages, so remember the area covered by
2067 large pages and trigger a full TLB flush if these are invalidated. */
2068static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2069 target_ulong size)
2070{
2071 target_ulong mask = ~(size - 1);
2072
2073 if (env->tlb_flush_addr == (target_ulong)-1) {
2074 env->tlb_flush_addr = vaddr & mask;
2075 env->tlb_flush_mask = mask;
2076 return;
2077 }
2078 /* Extend the existing region to include the new page.
2079 This is a compromise between unnecessary flushes and the cost
2080 of maintaining a full variable size TLB. */
2081 mask &= env->tlb_flush_mask;
2082 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2083 mask <<= 1;
2084 }
2085 env->tlb_flush_addr &= mask;
2086 env->tlb_flush_mask = mask;
2087}
2088
2089static bool is_ram_rom(ram_addr_t pd)
2090{
2091 pd &= ~TARGET_PAGE_MASK;
2092 return pd == io_mem_ram.ram_addr || pd == io_mem_rom.ram_addr;
2093}
2094
2095static bool is_ram_rom_romd(ram_addr_t pd)
2096{
2097 return is_ram_rom(pd) || (pd & IO_MEM_ROMD);
2098}
2099
2100/* Add a new TLB entry. At most one entry for a given virtual address
2101 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2102 supplied size is only used by tlb_flush_page. */
2103void tlb_set_page(CPUState *env, target_ulong vaddr,
2104 target_phys_addr_t paddr, int prot,
2105 int mmu_idx, target_ulong size)
2106{
2107 PhysPageDesc p;
2108 unsigned long pd;
2109 unsigned int index;
2110 target_ulong address;
2111 target_ulong code_address;
2112 unsigned long addend;
2113 CPUTLBEntry *te;
2114 CPUWatchpoint *wp;
2115 target_phys_addr_t iotlb;
2116
2117 assert(size >= TARGET_PAGE_SIZE);
2118 if (size != TARGET_PAGE_SIZE) {
2119 tlb_add_large_page(env, vaddr, size);
2120 }
2121 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2122 pd = p.phys_offset;
2123#if defined(DEBUG_TLB)
2124 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2125 " prot=%x idx=%d pd=0x%08lx\n",
2126 vaddr, paddr, prot, mmu_idx, pd);
2127#endif
2128
2129 address = vaddr;
2130 if (!is_ram_rom_romd(pd)) {
2131 /* IO memory case (romd handled later) */
2132 address |= TLB_MMIO;
2133 }
2134 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2135 if (is_ram_rom(pd)) {
2136 /* Normal RAM. */
2137 iotlb = pd & TARGET_PAGE_MASK;
2138 if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr)
2139 iotlb |= io_mem_notdirty.ram_addr;
2140 else
2141 iotlb |= io_mem_rom.ram_addr;
2142 } else {
2143 /* IO handlers are currently passed a physical address.
2144 It would be nice to pass an offset from the base address
2145 of that region. This would avoid having to special case RAM,
2146 and avoid full address decoding in every device.
2147 We can't use the high bits of pd for this because
2148 IO_MEM_ROMD uses these as a ram address. */
2149 iotlb = (pd & ~TARGET_PAGE_MASK);
2150 iotlb += p.region_offset;
2151 }
2152
2153 code_address = address;
2154 /* Make accesses to pages with watchpoints go via the
2155 watchpoint trap routines. */
2156 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2157 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2158 /* Avoid trapping reads of pages with a write breakpoint. */
2159 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2160 iotlb = io_mem_watch + paddr;
2161 address |= TLB_MMIO;
2162 break;
2163 }
2164 }
2165 }
2166
2167 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2168 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2169 te = &env->tlb_table[mmu_idx][index];
2170 te->addend = addend - vaddr;
2171 if (prot & PAGE_READ) {
2172 te->addr_read = address;
2173 } else {
2174 te->addr_read = -1;
2175 }
2176
2177 if (prot & PAGE_EXEC) {
2178 te->addr_code = code_address;
2179 } else {
2180 te->addr_code = -1;
2181 }
2182 if (prot & PAGE_WRITE) {
2183 if ((pd & ~TARGET_PAGE_MASK) == io_mem_rom.ram_addr ||
2184 (pd & IO_MEM_ROMD)) {
2185 /* Write access calls the I/O callback. */
2186 te->addr_write = address | TLB_MMIO;
2187 } else if ((pd & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr &&
2188 !cpu_physical_memory_is_dirty(pd)) {
2189 te->addr_write = address | TLB_NOTDIRTY;
2190 } else {
2191 te->addr_write = address;
2192 }
2193 } else {
2194 te->addr_write = -1;
2195 }
2196}
2197
2198#else
2199
2200void tlb_flush(CPUState *env, int flush_global)
2201{
2202}
2203
2204void tlb_flush_page(CPUState *env, target_ulong addr)
2205{
2206}
2207
2208/*
2209 * Walks guest process memory "regions" one by one
2210 * and calls callback function 'fn' for each region.
2211 */
2212
2213struct walk_memory_regions_data
2214{
2215 walk_memory_regions_fn fn;
2216 void *priv;
2217 unsigned long start;
2218 int prot;
2219};
2220
2221static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2222 abi_ulong end, int new_prot)
2223{
2224 if (data->start != -1ul) {
2225 int rc = data->fn(data->priv, data->start, end, data->prot);
2226 if (rc != 0) {
2227 return rc;
2228 }
2229 }
2230
2231 data->start = (new_prot ? end : -1ul);
2232 data->prot = new_prot;
2233
2234 return 0;
2235}
2236
2237static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2238 abi_ulong base, int level, void **lp)
2239{
2240 abi_ulong pa;
2241 int i, rc;
2242
2243 if (*lp == NULL) {
2244 return walk_memory_regions_end(data, base, 0);
2245 }
2246
2247 if (level == 0) {
2248 PageDesc *pd = *lp;
2249 for (i = 0; i < L2_SIZE; ++i) {
2250 int prot = pd[i].flags;
2251
2252 pa = base | (i << TARGET_PAGE_BITS);
2253 if (prot != data->prot) {
2254 rc = walk_memory_regions_end(data, pa, prot);
2255 if (rc != 0) {
2256 return rc;
2257 }
2258 }
2259 }
2260 } else {
2261 void **pp = *lp;
2262 for (i = 0; i < L2_SIZE; ++i) {
2263 pa = base | ((abi_ulong)i <<
2264 (TARGET_PAGE_BITS + L2_BITS * level));
2265 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2266 if (rc != 0) {
2267 return rc;
2268 }
2269 }
2270 }
2271
2272 return 0;
2273}
2274
2275int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2276{
2277 struct walk_memory_regions_data data;
2278 unsigned long i;
2279
2280 data.fn = fn;
2281 data.priv = priv;
2282 data.start = -1ul;
2283 data.prot = 0;
2284
2285 for (i = 0; i < V_L1_SIZE; i++) {
2286 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2287 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2288 if (rc != 0) {
2289 return rc;
2290 }
2291 }
2292
2293 return walk_memory_regions_end(&data, 0, 0);
2294}
2295
2296static int dump_region(void *priv, abi_ulong start,
2297 abi_ulong end, unsigned long prot)
2298{
2299 FILE *f = (FILE *)priv;
2300
2301 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2302 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2303 start, end, end - start,
2304 ((prot & PAGE_READ) ? 'r' : '-'),
2305 ((prot & PAGE_WRITE) ? 'w' : '-'),
2306 ((prot & PAGE_EXEC) ? 'x' : '-'));
2307
2308 return (0);
2309}
2310
2311/* dump memory mappings */
2312void page_dump(FILE *f)
2313{
2314 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2315 "start", "end", "size", "prot");
2316 walk_memory_regions(f, dump_region);
2317}
2318
2319int page_get_flags(target_ulong address)
2320{
2321 PageDesc *p;
2322
2323 p = page_find(address >> TARGET_PAGE_BITS);
2324 if (!p)
2325 return 0;
2326 return p->flags;
2327}
2328
2329/* Modify the flags of a page and invalidate the code if necessary.
2330 The flag PAGE_WRITE_ORG is positioned automatically depending
2331 on PAGE_WRITE. The mmap_lock should already be held. */
2332void page_set_flags(target_ulong start, target_ulong end, int flags)
2333{
2334 target_ulong addr, len;
2335
2336 /* This function should never be called with addresses outside the
2337 guest address space. If this assert fires, it probably indicates
2338 a missing call to h2g_valid. */
2339#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2340 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2341#endif
2342 assert(start < end);
2343
2344 start = start & TARGET_PAGE_MASK;
2345 end = TARGET_PAGE_ALIGN(end);
2346
2347 if (flags & PAGE_WRITE) {
2348 flags |= PAGE_WRITE_ORG;
2349 }
2350
2351 for (addr = start, len = end - start;
2352 len != 0;
2353 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2354 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2355
2356 /* If the write protection bit is set, then we invalidate
2357 the code inside. */
2358 if (!(p->flags & PAGE_WRITE) &&
2359 (flags & PAGE_WRITE) &&
2360 p->first_tb) {
2361 tb_invalidate_phys_page(addr, 0, NULL);
2362 }
2363 p->flags = flags;
2364 }
2365}
2366
2367int page_check_range(target_ulong start, target_ulong len, int flags)
2368{
2369 PageDesc *p;
2370 target_ulong end;
2371 target_ulong addr;
2372
2373 /* This function should never be called with addresses outside the
2374 guest address space. If this assert fires, it probably indicates
2375 a missing call to h2g_valid. */
2376#if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2377 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2378#endif
2379
2380 if (len == 0) {
2381 return 0;
2382 }
2383 if (start + len - 1 < start) {
2384 /* We've wrapped around. */
2385 return -1;
2386 }
2387
2388 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2389 start = start & TARGET_PAGE_MASK;
2390
2391 for (addr = start, len = end - start;
2392 len != 0;
2393 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2394 p = page_find(addr >> TARGET_PAGE_BITS);
2395 if( !p )
2396 return -1;
2397 if( !(p->flags & PAGE_VALID) )
2398 return -1;
2399
2400 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2401 return -1;
2402 if (flags & PAGE_WRITE) {
2403 if (!(p->flags & PAGE_WRITE_ORG))
2404 return -1;
2405 /* unprotect the page if it was put read-only because it
2406 contains translated code */
2407 if (!(p->flags & PAGE_WRITE)) {
2408 if (!page_unprotect(addr, 0, NULL))
2409 return -1;
2410 }
2411 return 0;
2412 }
2413 }
2414 return 0;
2415}
2416
2417/* called from signal handler: invalidate the code and unprotect the
2418 page. Return TRUE if the fault was successfully handled. */
2419int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2420{
2421 unsigned int prot;
2422 PageDesc *p;
2423 target_ulong host_start, host_end, addr;
2424
2425 /* Technically this isn't safe inside a signal handler. However we
2426 know this only ever happens in a synchronous SEGV handler, so in
2427 practice it seems to be ok. */
2428 mmap_lock();
2429
2430 p = page_find(address >> TARGET_PAGE_BITS);
2431 if (!p) {
2432 mmap_unlock();
2433 return 0;
2434 }
2435
2436 /* if the page was really writable, then we change its
2437 protection back to writable */
2438 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2439 host_start = address & qemu_host_page_mask;
2440 host_end = host_start + qemu_host_page_size;
2441
2442 prot = 0;
2443 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2444 p = page_find(addr >> TARGET_PAGE_BITS);
2445 p->flags |= PAGE_WRITE;
2446 prot |= p->flags;
2447
2448 /* and since the content will be modified, we must invalidate
2449 the corresponding translated code. */
2450 tb_invalidate_phys_page(addr, pc, puc);
2451#ifdef DEBUG_TB_CHECK
2452 tb_invalidate_check(addr);
2453#endif
2454 }
2455 mprotect((void *)g2h(host_start), qemu_host_page_size,
2456 prot & PAGE_BITS);
2457
2458 mmap_unlock();
2459 return 1;
2460 }
2461 mmap_unlock();
2462 return 0;
2463}
2464
2465static inline void tlb_set_dirty(CPUState *env,
2466 unsigned long addr, target_ulong vaddr)
2467{
2468}
2469#endif /* defined(CONFIG_USER_ONLY) */
2470
2471#if !defined(CONFIG_USER_ONLY)
2472
2473#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2474typedef struct subpage_t {
2475 target_phys_addr_t base;
2476 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2477 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2478} subpage_t;
2479
2480static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2481 ram_addr_t memory, ram_addr_t region_offset);
2482static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2483 ram_addr_t orig_memory,
2484 ram_addr_t region_offset);
2485#define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2486 need_subpage) \
2487 do { \
2488 if (addr > start_addr) \
2489 start_addr2 = 0; \
2490 else { \
2491 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2492 if (start_addr2 > 0) \
2493 need_subpage = 1; \
2494 } \
2495 \
2496 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2497 end_addr2 = TARGET_PAGE_SIZE - 1; \
2498 else { \
2499 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2500 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2501 need_subpage = 1; \
2502 } \
2503 } while (0)
2504
2505/* register physical memory.
2506 For RAM, 'size' must be a multiple of the target page size.
2507 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2508 io memory page. The address used when calling the IO function is
2509 the offset from the start of the region, plus region_offset. Both
2510 start_addr and region_offset are rounded down to a page boundary
2511 before calculating this offset. This should not be a problem unless
2512 the low bits of start_addr and region_offset differ. */
2513void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2514 ram_addr_t size,
2515 ram_addr_t phys_offset,
2516 ram_addr_t region_offset,
2517 bool log_dirty)
2518{
2519 target_phys_addr_t addr, end_addr;
2520 PhysPageDesc *p;
2521 CPUState *env;
2522 ram_addr_t orig_size = size;
2523 subpage_t *subpage;
2524
2525 assert(size);
2526
2527 if (phys_offset == io_mem_unassigned.ram_addr) {
2528 region_offset = start_addr;
2529 }
2530 region_offset &= TARGET_PAGE_MASK;
2531 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2532 end_addr = start_addr + (target_phys_addr_t)size;
2533
2534 addr = start_addr;
2535 do {
2536 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 0);
2537 if (p && p->phys_offset != io_mem_unassigned.ram_addr) {
2538 ram_addr_t orig_memory = p->phys_offset;
2539 target_phys_addr_t start_addr2, end_addr2;
2540 int need_subpage = 0;
2541
2542 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2543 need_subpage);
2544 if (need_subpage) {
2545 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2546 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2547 &p->phys_offset, orig_memory,
2548 p->region_offset);
2549 } else {
2550 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2551 >> IO_MEM_SHIFT];
2552 }
2553 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2554 region_offset);
2555 p->region_offset = 0;
2556 } else {
2557 p->phys_offset = phys_offset;
2558 p->region_offset = region_offset;
2559 if (is_ram_rom_romd(phys_offset))
2560 phys_offset += TARGET_PAGE_SIZE;
2561 }
2562 } else {
2563 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2564 p->phys_offset = phys_offset;
2565 p->region_offset = region_offset;
2566 if (is_ram_rom_romd(phys_offset)) {
2567 phys_offset += TARGET_PAGE_SIZE;
2568 } else {
2569 target_phys_addr_t start_addr2, end_addr2;
2570 int need_subpage = 0;
2571
2572 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2573 end_addr2, need_subpage);
2574
2575 if (need_subpage) {
2576 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2577 &p->phys_offset,
2578 io_mem_unassigned.ram_addr,
2579 addr & TARGET_PAGE_MASK);
2580 subpage_register(subpage, start_addr2, end_addr2,
2581 phys_offset, region_offset);
2582 p->region_offset = 0;
2583 }
2584 }
2585 }
2586 region_offset += TARGET_PAGE_SIZE;
2587 addr += TARGET_PAGE_SIZE;
2588 } while (addr != end_addr);
2589
2590 /* since each CPU stores ram addresses in its TLB cache, we must
2591 reset the modified entries */
2592 /* XXX: slow ! */
2593 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2594 tlb_flush(env, 1);
2595 }
2596}
2597
2598void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2599{
2600 if (kvm_enabled())
2601 kvm_coalesce_mmio_region(addr, size);
2602}
2603
2604void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2605{
2606 if (kvm_enabled())
2607 kvm_uncoalesce_mmio_region(addr, size);
2608}
2609
2610void qemu_flush_coalesced_mmio_buffer(void)
2611{
2612 if (kvm_enabled())
2613 kvm_flush_coalesced_mmio_buffer();
2614}
2615
2616#if defined(__linux__) && !defined(TARGET_S390X)
2617
2618#include <sys/vfs.h>
2619
2620#define HUGETLBFS_MAGIC 0x958458f6
2621
2622static long gethugepagesize(const char *path)
2623{
2624 struct statfs fs;
2625 int ret;
2626
2627 do {
2628 ret = statfs(path, &fs);
2629 } while (ret != 0 && errno == EINTR);
2630
2631 if (ret != 0) {
2632 perror(path);
2633 return 0;
2634 }
2635
2636 if (fs.f_type != HUGETLBFS_MAGIC)
2637 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2638
2639 return fs.f_bsize;
2640}
2641
2642static void *file_ram_alloc(RAMBlock *block,
2643 ram_addr_t memory,
2644 const char *path)
2645{
2646 char *filename;
2647 void *area;
2648 int fd;
2649#ifdef MAP_POPULATE
2650 int flags;
2651#endif
2652 unsigned long hpagesize;
2653
2654 hpagesize = gethugepagesize(path);
2655 if (!hpagesize) {
2656 return NULL;
2657 }
2658
2659 if (memory < hpagesize) {
2660 return NULL;
2661 }
2662
2663 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2664 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2665 return NULL;
2666 }
2667
2668 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2669 return NULL;
2670 }
2671
2672 fd = mkstemp(filename);
2673 if (fd < 0) {
2674 perror("unable to create backing store for hugepages");
2675 free(filename);
2676 return NULL;
2677 }
2678 unlink(filename);
2679 free(filename);
2680
2681 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2682
2683 /*
2684 * ftruncate is not supported by hugetlbfs in older
2685 * hosts, so don't bother bailing out on errors.
2686 * If anything goes wrong with it under other filesystems,
2687 * mmap will fail.
2688 */
2689 if (ftruncate(fd, memory))
2690 perror("ftruncate");
2691
2692#ifdef MAP_POPULATE
2693 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2694 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2695 * to sidestep this quirk.
2696 */
2697 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2698 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2699#else
2700 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2701#endif
2702 if (area == MAP_FAILED) {
2703 perror("file_ram_alloc: can't mmap RAM pages");
2704 close(fd);
2705 return (NULL);
2706 }
2707 block->fd = fd;
2708 return area;
2709}
2710#endif
2711
2712static ram_addr_t find_ram_offset(ram_addr_t size)
2713{
2714 RAMBlock *block, *next_block;
2715 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2716
2717 if (QLIST_EMPTY(&ram_list.blocks))
2718 return 0;
2719
2720 QLIST_FOREACH(block, &ram_list.blocks, next) {
2721 ram_addr_t end, next = RAM_ADDR_MAX;
2722
2723 end = block->offset + block->length;
2724
2725 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2726 if (next_block->offset >= end) {
2727 next = MIN(next, next_block->offset);
2728 }
2729 }
2730 if (next - end >= size && next - end < mingap) {
2731 offset = end;
2732 mingap = next - end;
2733 }
2734 }
2735
2736 if (offset == RAM_ADDR_MAX) {
2737 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2738 (uint64_t)size);
2739 abort();
2740 }
2741
2742 return offset;
2743}
2744
2745static ram_addr_t last_ram_offset(void)
2746{
2747 RAMBlock *block;
2748 ram_addr_t last = 0;
2749
2750 QLIST_FOREACH(block, &ram_list.blocks, next)
2751 last = MAX(last, block->offset + block->length);
2752
2753 return last;
2754}
2755
2756void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2757{
2758 RAMBlock *new_block, *block;
2759
2760 new_block = NULL;
2761 QLIST_FOREACH(block, &ram_list.blocks, next) {
2762 if (block->offset == addr) {
2763 new_block = block;
2764 break;
2765 }
2766 }
2767 assert(new_block);
2768 assert(!new_block->idstr[0]);
2769
2770 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2771 char *id = dev->parent_bus->info->get_dev_path(dev);
2772 if (id) {
2773 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2774 g_free(id);
2775 }
2776 }
2777 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2778
2779 QLIST_FOREACH(block, &ram_list.blocks, next) {
2780 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2781 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2782 new_block->idstr);
2783 abort();
2784 }
2785 }
2786}
2787
2788ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2789 MemoryRegion *mr)
2790{
2791 RAMBlock *new_block;
2792
2793 size = TARGET_PAGE_ALIGN(size);
2794 new_block = g_malloc0(sizeof(*new_block));
2795
2796 new_block->mr = mr;
2797 new_block->offset = find_ram_offset(size);
2798 if (host) {
2799 new_block->host = host;
2800 new_block->flags |= RAM_PREALLOC_MASK;
2801 } else {
2802 if (mem_path) {
2803#if defined (__linux__) && !defined(TARGET_S390X)
2804 new_block->host = file_ram_alloc(new_block, size, mem_path);
2805 if (!new_block->host) {
2806 new_block->host = qemu_vmalloc(size);
2807 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2808 }
2809#else
2810 fprintf(stderr, "-mem-path option unsupported\n");
2811 exit(1);
2812#endif
2813 } else {
2814#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2815 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2816 an system defined value, which is at least 256GB. Larger systems
2817 have larger values. We put the guest between the end of data
2818 segment (system break) and this value. We use 32GB as a base to
2819 have enough room for the system break to grow. */
2820 new_block->host = mmap((void*)0x800000000, size,
2821 PROT_EXEC|PROT_READ|PROT_WRITE,
2822 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2823 if (new_block->host == MAP_FAILED) {
2824 fprintf(stderr, "Allocating RAM failed\n");
2825 abort();
2826 }
2827#else
2828 if (xen_enabled()) {
2829 xen_ram_alloc(new_block->offset, size, mr);
2830 } else {
2831 new_block->host = qemu_vmalloc(size);
2832 }
2833#endif
2834 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2835 }
2836 }
2837 new_block->length = size;
2838
2839 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2840
2841 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2842 last_ram_offset() >> TARGET_PAGE_BITS);
2843 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2844 0xff, size >> TARGET_PAGE_BITS);
2845
2846 if (kvm_enabled())
2847 kvm_setup_guest_memory(new_block->host, size);
2848
2849 return new_block->offset;
2850}
2851
2852ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2853{
2854 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2855}
2856
2857void qemu_ram_free_from_ptr(ram_addr_t addr)
2858{
2859 RAMBlock *block;
2860
2861 QLIST_FOREACH(block, &ram_list.blocks, next) {
2862 if (addr == block->offset) {
2863 QLIST_REMOVE(block, next);
2864 g_free(block);
2865 return;
2866 }
2867 }
2868}
2869
2870void qemu_ram_free(ram_addr_t addr)
2871{
2872 RAMBlock *block;
2873
2874 QLIST_FOREACH(block, &ram_list.blocks, next) {
2875 if (addr == block->offset) {
2876 QLIST_REMOVE(block, next);
2877 if (block->flags & RAM_PREALLOC_MASK) {
2878 ;
2879 } else if (mem_path) {
2880#if defined (__linux__) && !defined(TARGET_S390X)
2881 if (block->fd) {
2882 munmap(block->host, block->length);
2883 close(block->fd);
2884 } else {
2885 qemu_vfree(block->host);
2886 }
2887#else
2888 abort();
2889#endif
2890 } else {
2891#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2892 munmap(block->host, block->length);
2893#else
2894 if (xen_enabled()) {
2895 xen_invalidate_map_cache_entry(block->host);
2896 } else {
2897 qemu_vfree(block->host);
2898 }
2899#endif
2900 }
2901 g_free(block);
2902 return;
2903 }
2904 }
2905
2906}
2907
2908#ifndef _WIN32
2909void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2910{
2911 RAMBlock *block;
2912 ram_addr_t offset;
2913 int flags;
2914 void *area, *vaddr;
2915
2916 QLIST_FOREACH(block, &ram_list.blocks, next) {
2917 offset = addr - block->offset;
2918 if (offset < block->length) {
2919 vaddr = block->host + offset;
2920 if (block->flags & RAM_PREALLOC_MASK) {
2921 ;
2922 } else {
2923 flags = MAP_FIXED;
2924 munmap(vaddr, length);
2925 if (mem_path) {
2926#if defined(__linux__) && !defined(TARGET_S390X)
2927 if (block->fd) {
2928#ifdef MAP_POPULATE
2929 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2930 MAP_PRIVATE;
2931#else
2932 flags |= MAP_PRIVATE;
2933#endif
2934 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2935 flags, block->fd, offset);
2936 } else {
2937 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2938 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2939 flags, -1, 0);
2940 }
2941#else
2942 abort();
2943#endif
2944 } else {
2945#if defined(TARGET_S390X) && defined(CONFIG_KVM)
2946 flags |= MAP_SHARED | MAP_ANONYMOUS;
2947 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2948 flags, -1, 0);
2949#else
2950 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2951 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2952 flags, -1, 0);
2953#endif
2954 }
2955 if (area != vaddr) {
2956 fprintf(stderr, "Could not remap addr: "
2957 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2958 length, addr);
2959 exit(1);
2960 }
2961 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2962 }
2963 return;
2964 }
2965 }
2966}
2967#endif /* !_WIN32 */
2968
2969/* Return a host pointer to ram allocated with qemu_ram_alloc.
2970 With the exception of the softmmu code in this file, this should
2971 only be used for local memory (e.g. video ram) that the device owns,
2972 and knows it isn't going to access beyond the end of the block.
2973
2974 It should not be used for general purpose DMA.
2975 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2976 */
2977void *qemu_get_ram_ptr(ram_addr_t addr)
2978{
2979 RAMBlock *block;
2980
2981 QLIST_FOREACH(block, &ram_list.blocks, next) {
2982 if (addr - block->offset < block->length) {
2983 /* Move this entry to to start of the list. */
2984 if (block != QLIST_FIRST(&ram_list.blocks)) {
2985 QLIST_REMOVE(block, next);
2986 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2987 }
2988 if (xen_enabled()) {
2989 /* We need to check if the requested address is in the RAM
2990 * because we don't want to map the entire memory in QEMU.
2991 * In that case just map until the end of the page.
2992 */
2993 if (block->offset == 0) {
2994 return xen_map_cache(addr, 0, 0);
2995 } else if (block->host == NULL) {
2996 block->host =
2997 xen_map_cache(block->offset, block->length, 1);
2998 }
2999 }
3000 return block->host + (addr - block->offset);
3001 }
3002 }
3003
3004 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3005 abort();
3006
3007 return NULL;
3008}
3009
3010/* Return a host pointer to ram allocated with qemu_ram_alloc.
3011 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3012 */
3013void *qemu_safe_ram_ptr(ram_addr_t addr)
3014{
3015 RAMBlock *block;
3016
3017 QLIST_FOREACH(block, &ram_list.blocks, next) {
3018 if (addr - block->offset < block->length) {
3019 if (xen_enabled()) {
3020 /* We need to check if the requested address is in the RAM
3021 * because we don't want to map the entire memory in QEMU.
3022 * In that case just map until the end of the page.
3023 */
3024 if (block->offset == 0) {
3025 return xen_map_cache(addr, 0, 0);
3026 } else if (block->host == NULL) {
3027 block->host =
3028 xen_map_cache(block->offset, block->length, 1);
3029 }
3030 }
3031 return block->host + (addr - block->offset);
3032 }
3033 }
3034
3035 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3036 abort();
3037
3038 return NULL;
3039}
3040
3041/* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3042 * but takes a size argument */
3043void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3044{
3045 if (*size == 0) {
3046 return NULL;
3047 }
3048 if (xen_enabled()) {
3049 return xen_map_cache(addr, *size, 1);
3050 } else {
3051 RAMBlock *block;
3052
3053 QLIST_FOREACH(block, &ram_list.blocks, next) {
3054 if (addr - block->offset < block->length) {
3055 if (addr - block->offset + *size > block->length)
3056 *size = block->length - addr + block->offset;
3057 return block->host + (addr - block->offset);
3058 }
3059 }
3060
3061 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3062 abort();
3063 }
3064}
3065
3066void qemu_put_ram_ptr(void *addr)
3067{
3068 trace_qemu_put_ram_ptr(addr);
3069}
3070
3071int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3072{
3073 RAMBlock *block;
3074 uint8_t *host = ptr;
3075
3076 if (xen_enabled()) {
3077 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3078 return 0;
3079 }
3080
3081 QLIST_FOREACH(block, &ram_list.blocks, next) {
3082 /* This case append when the block is not mapped. */
3083 if (block->host == NULL) {
3084 continue;
3085 }
3086 if (host - block->host < block->length) {
3087 *ram_addr = block->offset + (host - block->host);
3088 return 0;
3089 }
3090 }
3091
3092 return -1;
3093}
3094
3095/* Some of the softmmu routines need to translate from a host pointer
3096 (typically a TLB entry) back to a ram offset. */
3097ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3098{
3099 ram_addr_t ram_addr;
3100
3101 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3102 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3103 abort();
3104 }
3105 return ram_addr;
3106}
3107
3108static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3109 unsigned size)
3110{
3111#ifdef DEBUG_UNASSIGNED
3112 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3113#endif
3114#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3115 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3116#endif
3117 return 0;
3118}
3119
3120static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3121 uint64_t val, unsigned size)
3122{
3123#ifdef DEBUG_UNASSIGNED
3124 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3125#endif
3126#if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3127 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3128#endif
3129}
3130
3131static const MemoryRegionOps unassigned_mem_ops = {
3132 .read = unassigned_mem_read,
3133 .write = unassigned_mem_write,
3134 .endianness = DEVICE_NATIVE_ENDIAN,
3135};
3136
3137static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3138 unsigned size)
3139{
3140 abort();
3141}
3142
3143static void error_mem_write(void *opaque, target_phys_addr_t addr,
3144 uint64_t value, unsigned size)
3145{
3146 abort();
3147}
3148
3149static const MemoryRegionOps error_mem_ops = {
3150 .read = error_mem_read,
3151 .write = error_mem_write,
3152 .endianness = DEVICE_NATIVE_ENDIAN,
3153};
3154
3155static const MemoryRegionOps rom_mem_ops = {
3156 .read = error_mem_read,
3157 .write = unassigned_mem_write,
3158 .endianness = DEVICE_NATIVE_ENDIAN,
3159};
3160
3161static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3162 uint64_t val, unsigned size)
3163{
3164 int dirty_flags;
3165 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3166 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3167#if !defined(CONFIG_USER_ONLY)
3168 tb_invalidate_phys_page_fast(ram_addr, size);
3169 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3170#endif
3171 }
3172 switch (size) {
3173 case 1:
3174 stb_p(qemu_get_ram_ptr(ram_addr), val);
3175 break;
3176 case 2:
3177 stw_p(qemu_get_ram_ptr(ram_addr), val);
3178 break;
3179 case 4:
3180 stl_p(qemu_get_ram_ptr(ram_addr), val);
3181 break;
3182 default:
3183 abort();
3184 }
3185 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3186 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3187 /* we remove the notdirty callback only if the code has been
3188 flushed */
3189 if (dirty_flags == 0xff)
3190 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3191}
3192
3193static const MemoryRegionOps notdirty_mem_ops = {
3194 .read = error_mem_read,
3195 .write = notdirty_mem_write,
3196 .endianness = DEVICE_NATIVE_ENDIAN,
3197};
3198
3199/* Generate a debug exception if a watchpoint has been hit. */
3200static void check_watchpoint(int offset, int len_mask, int flags)
3201{
3202 CPUState *env = cpu_single_env;
3203 target_ulong pc, cs_base;
3204 TranslationBlock *tb;
3205 target_ulong vaddr;
3206 CPUWatchpoint *wp;
3207 int cpu_flags;
3208
3209 if (env->watchpoint_hit) {
3210 /* We re-entered the check after replacing the TB. Now raise
3211 * the debug interrupt so that is will trigger after the
3212 * current instruction. */
3213 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3214 return;
3215 }
3216 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3217 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3218 if ((vaddr == (wp->vaddr & len_mask) ||
3219 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3220 wp->flags |= BP_WATCHPOINT_HIT;
3221 if (!env->watchpoint_hit) {
3222 env->watchpoint_hit = wp;
3223 tb = tb_find_pc(env->mem_io_pc);
3224 if (!tb) {
3225 cpu_abort(env, "check_watchpoint: could not find TB for "
3226 "pc=%p", (void *)env->mem_io_pc);
3227 }
3228 cpu_restore_state(tb, env, env->mem_io_pc);
3229 tb_phys_invalidate(tb, -1);
3230 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3231 env->exception_index = EXCP_DEBUG;
3232 } else {
3233 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3234 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3235 }
3236 cpu_resume_from_signal(env, NULL);
3237 }
3238 } else {
3239 wp->flags &= ~BP_WATCHPOINT_HIT;
3240 }
3241 }
3242}
3243
3244/* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3245 so these check for a hit then pass through to the normal out-of-line
3246 phys routines. */
3247static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3248{
3249 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3250 return ldub_phys(addr);
3251}
3252
3253static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3254{
3255 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3256 return lduw_phys(addr);
3257}
3258
3259static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3260{
3261 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3262 return ldl_phys(addr);
3263}
3264
3265static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3266 uint32_t val)
3267{
3268 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3269 stb_phys(addr, val);
3270}
3271
3272static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3273 uint32_t val)
3274{
3275 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3276 stw_phys(addr, val);
3277}
3278
3279static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3280 uint32_t val)
3281{
3282 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3283 stl_phys(addr, val);
3284}
3285
3286static CPUReadMemoryFunc * const watch_mem_read[3] = {
3287 watch_mem_readb,
3288 watch_mem_readw,
3289 watch_mem_readl,
3290};
3291
3292static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3293 watch_mem_writeb,
3294 watch_mem_writew,
3295 watch_mem_writel,
3296};
3297
3298static inline uint32_t subpage_readlen (subpage_t *mmio,
3299 target_phys_addr_t addr,
3300 unsigned int len)
3301{
3302 unsigned int idx = SUBPAGE_IDX(addr);
3303#if defined(DEBUG_SUBPAGE)
3304 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3305 mmio, len, addr, idx);
3306#endif
3307
3308 addr += mmio->region_offset[idx];
3309 idx = mmio->sub_io_index[idx];
3310 return io_mem_read(idx, addr, 1 <<len);
3311}
3312
3313static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3314 uint32_t value, unsigned int len)
3315{
3316 unsigned int idx = SUBPAGE_IDX(addr);
3317#if defined(DEBUG_SUBPAGE)
3318 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3319 __func__, mmio, len, addr, idx, value);
3320#endif
3321
3322 addr += mmio->region_offset[idx];
3323 idx = mmio->sub_io_index[idx];
3324 io_mem_write(idx, addr, value, 1 << len);
3325}
3326
3327static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3328{
3329 return subpage_readlen(opaque, addr, 0);
3330}
3331
3332static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3333 uint32_t value)
3334{
3335 subpage_writelen(opaque, addr, value, 0);
3336}
3337
3338static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3339{
3340 return subpage_readlen(opaque, addr, 1);
3341}
3342
3343static void subpage_writew (void *opaque, target_phys_addr_t addr,
3344 uint32_t value)
3345{
3346 subpage_writelen(opaque, addr, value, 1);
3347}
3348
3349static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3350{
3351 return subpage_readlen(opaque, addr, 2);
3352}
3353
3354static void subpage_writel (void *opaque, target_phys_addr_t addr,
3355 uint32_t value)
3356{
3357 subpage_writelen(opaque, addr, value, 2);
3358}
3359
3360static CPUReadMemoryFunc * const subpage_read[] = {
3361 &subpage_readb,
3362 &subpage_readw,
3363 &subpage_readl,
3364};
3365
3366static CPUWriteMemoryFunc * const subpage_write[] = {
3367 &subpage_writeb,
3368 &subpage_writew,
3369 &subpage_writel,
3370};
3371
3372static uint32_t subpage_ram_readb(void *opaque, target_phys_addr_t addr)
3373{
3374 ram_addr_t raddr = addr;
3375 void *ptr = qemu_get_ram_ptr(raddr);
3376 return ldub_p(ptr);
3377}
3378
3379static void subpage_ram_writeb(void *opaque, target_phys_addr_t addr,
3380 uint32_t value)
3381{
3382 ram_addr_t raddr = addr;
3383 void *ptr = qemu_get_ram_ptr(raddr);
3384 stb_p(ptr, value);
3385}
3386
3387static uint32_t subpage_ram_readw(void *opaque, target_phys_addr_t addr)
3388{
3389 ram_addr_t raddr = addr;
3390 void *ptr = qemu_get_ram_ptr(raddr);
3391 return lduw_p(ptr);
3392}
3393
3394static void subpage_ram_writew(void *opaque, target_phys_addr_t addr,
3395 uint32_t value)
3396{
3397 ram_addr_t raddr = addr;
3398 void *ptr = qemu_get_ram_ptr(raddr);
3399 stw_p(ptr, value);
3400}
3401
3402static uint32_t subpage_ram_readl(void *opaque, target_phys_addr_t addr)
3403{
3404 ram_addr_t raddr = addr;
3405 void *ptr = qemu_get_ram_ptr(raddr);
3406 return ldl_p(ptr);
3407}
3408
3409static void subpage_ram_writel(void *opaque, target_phys_addr_t addr,
3410 uint32_t value)
3411{
3412 ram_addr_t raddr = addr;
3413 void *ptr = qemu_get_ram_ptr(raddr);
3414 stl_p(ptr, value);
3415}
3416
3417static CPUReadMemoryFunc * const subpage_ram_read[] = {
3418 &subpage_ram_readb,
3419 &subpage_ram_readw,
3420 &subpage_ram_readl,
3421};
3422
3423static CPUWriteMemoryFunc * const subpage_ram_write[] = {
3424 &subpage_ram_writeb,
3425 &subpage_ram_writew,
3426 &subpage_ram_writel,
3427};
3428
3429static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3430 ram_addr_t memory, ram_addr_t region_offset)
3431{
3432 int idx, eidx;
3433
3434 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3435 return -1;
3436 idx = SUBPAGE_IDX(start);
3437 eidx = SUBPAGE_IDX(end);
3438#if defined(DEBUG_SUBPAGE)
3439 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3440 mmio, start, end, idx, eidx, memory);
3441#endif
3442 if ((memory & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
3443 memory = IO_MEM_SUBPAGE_RAM;
3444 }
3445 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3446 for (; idx <= eidx; idx++) {
3447 mmio->sub_io_index[idx] = memory;
3448 mmio->region_offset[idx] = region_offset;
3449 }
3450
3451 return 0;
3452}
3453
3454static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3455 ram_addr_t orig_memory,
3456 ram_addr_t region_offset)
3457{
3458 subpage_t *mmio;
3459 int subpage_memory;
3460
3461 mmio = g_malloc0(sizeof(subpage_t));
3462
3463 mmio->base = base;
3464 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio);
3465#if defined(DEBUG_SUBPAGE)
3466 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3467 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3468#endif
3469 *phys = subpage_memory | IO_MEM_SUBPAGE;
3470 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3471
3472 return mmio;
3473}
3474
3475static int get_free_io_mem_idx(void)
3476{
3477 int i;
3478
3479 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3480 if (!io_mem_used[i]) {
3481 io_mem_used[i] = 1;
3482 return i;
3483 }
3484 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3485 return -1;
3486}
3487
3488/* mem_read and mem_write are arrays of functions containing the
3489 function to access byte (index 0), word (index 1) and dword (index
3490 2). Functions can be omitted with a NULL function pointer.
3491 If io_index is non zero, the corresponding io zone is
3492 modified. If it is zero, a new io zone is allocated. The return
3493 value can be used with cpu_register_physical_memory(). (-1) is
3494 returned if error. */
3495static int cpu_register_io_memory_fixed(int io_index,
3496 CPUReadMemoryFunc * const *mem_read,
3497 CPUWriteMemoryFunc * const *mem_write,
3498 void *opaque)
3499{
3500 int i;
3501
3502 if (io_index <= 0) {
3503 io_index = get_free_io_mem_idx();
3504 if (io_index == -1)
3505 return io_index;
3506 } else {
3507 io_index >>= IO_MEM_SHIFT;
3508 if (io_index >= IO_MEM_NB_ENTRIES)
3509 return -1;
3510 }
3511
3512 for (i = 0; i < 3; ++i) {
3513 assert(mem_read[i]);
3514 _io_mem_read[io_index][i] = mem_read[i];
3515 }
3516 for (i = 0; i < 3; ++i) {
3517 assert(mem_write[i]);
3518 _io_mem_write[io_index][i] = mem_write[i];
3519 }
3520 io_mem_opaque[io_index] = opaque;
3521
3522 return (io_index << IO_MEM_SHIFT);
3523}
3524
3525int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3526 CPUWriteMemoryFunc * const *mem_write,
3527 void *opaque)
3528{
3529 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque);
3530}
3531
3532void cpu_unregister_io_memory(int io_table_address)
3533{
3534 int i;
3535 int io_index = io_table_address >> IO_MEM_SHIFT;
3536
3537 for (i=0;i < 3; i++) {
3538 _io_mem_read[io_index][i] = NULL;
3539 _io_mem_write[io_index][i] = NULL;
3540 }
3541 io_mem_opaque[io_index] = NULL;
3542 io_mem_used[io_index] = 0;
3543}
3544
3545static void io_mem_init(void)
3546{
3547 int i;
3548
3549 /* Must be first: */
3550 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3551 assert(io_mem_ram.ram_addr == 0);
3552 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3553 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3554 "unassigned", UINT64_MAX);
3555 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3556 "notdirty", UINT64_MAX);
3557 cpu_register_io_memory_fixed(IO_MEM_SUBPAGE_RAM, subpage_ram_read,
3558 subpage_ram_write, NULL);
3559 for (i=0; i<5; i++)
3560 io_mem_used[i] = 1;
3561
3562 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3563 watch_mem_write, NULL);
3564}
3565
3566static void memory_map_init(void)
3567{
3568 system_memory = g_malloc(sizeof(*system_memory));
3569 memory_region_init(system_memory, "system", INT64_MAX);
3570 set_system_memory_map(system_memory);
3571
3572 system_io = g_malloc(sizeof(*system_io));
3573 memory_region_init(system_io, "io", 65536);
3574 set_system_io_map(system_io);
3575}
3576
3577MemoryRegion *get_system_memory(void)
3578{
3579 return system_memory;
3580}
3581
3582MemoryRegion *get_system_io(void)
3583{
3584 return system_io;
3585}
3586
3587#endif /* !defined(CONFIG_USER_ONLY) */
3588
3589/* physical memory access (slow version, mainly for debug) */
3590#if defined(CONFIG_USER_ONLY)
3591int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3592 uint8_t *buf, int len, int is_write)
3593{
3594 int l, flags;
3595 target_ulong page;
3596 void * p;
3597
3598 while (len > 0) {
3599 page = addr & TARGET_PAGE_MASK;
3600 l = (page + TARGET_PAGE_SIZE) - addr;
3601 if (l > len)
3602 l = len;
3603 flags = page_get_flags(page);
3604 if (!(flags & PAGE_VALID))
3605 return -1;
3606 if (is_write) {
3607 if (!(flags & PAGE_WRITE))
3608 return -1;
3609 /* XXX: this code should not depend on lock_user */
3610 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3611 return -1;
3612 memcpy(p, buf, l);
3613 unlock_user(p, addr, l);
3614 } else {
3615 if (!(flags & PAGE_READ))
3616 return -1;
3617 /* XXX: this code should not depend on lock_user */
3618 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3619 return -1;
3620 memcpy(buf, p, l);
3621 unlock_user(p, addr, 0);
3622 }
3623 len -= l;
3624 buf += l;
3625 addr += l;
3626 }
3627 return 0;
3628}
3629
3630#else
3631void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3632 int len, int is_write)
3633{
3634 int l, io_index;
3635 uint8_t *ptr;
3636 uint32_t val;
3637 target_phys_addr_t page;
3638 ram_addr_t pd;
3639 PhysPageDesc p;
3640
3641 while (len > 0) {
3642 page = addr & TARGET_PAGE_MASK;
3643 l = (page + TARGET_PAGE_SIZE) - addr;
3644 if (l > len)
3645 l = len;
3646 p = phys_page_find(page >> TARGET_PAGE_BITS);
3647 pd = p.phys_offset;
3648
3649 if (is_write) {
3650 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
3651 target_phys_addr_t addr1;
3652 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3653 addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3654 /* XXX: could force cpu_single_env to NULL to avoid
3655 potential bugs */
3656 if (l >= 4 && ((addr1 & 3) == 0)) {
3657 /* 32 bit write access */
3658 val = ldl_p(buf);
3659 io_mem_write(io_index, addr1, val, 4);
3660 l = 4;
3661 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3662 /* 16 bit write access */
3663 val = lduw_p(buf);
3664 io_mem_write(io_index, addr1, val, 2);
3665 l = 2;
3666 } else {
3667 /* 8 bit write access */
3668 val = ldub_p(buf);
3669 io_mem_write(io_index, addr1, val, 1);
3670 l = 1;
3671 }
3672 } else {
3673 ram_addr_t addr1;
3674 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3675 /* RAM case */
3676 ptr = qemu_get_ram_ptr(addr1);
3677 memcpy(ptr, buf, l);
3678 if (!cpu_physical_memory_is_dirty(addr1)) {
3679 /* invalidate code */
3680 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3681 /* set dirty bit */
3682 cpu_physical_memory_set_dirty_flags(
3683 addr1, (0xff & ~CODE_DIRTY_FLAG));
3684 }
3685 qemu_put_ram_ptr(ptr);
3686 }
3687 } else {
3688 if (!is_ram_rom_romd(pd)) {
3689 target_phys_addr_t addr1;
3690 /* I/O case */
3691 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3692 addr1 = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3693 if (l >= 4 && ((addr1 & 3) == 0)) {
3694 /* 32 bit read access */
3695 val = io_mem_read(io_index, addr1, 4);
3696 stl_p(buf, val);
3697 l = 4;
3698 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3699 /* 16 bit read access */
3700 val = io_mem_read(io_index, addr1, 2);
3701 stw_p(buf, val);
3702 l = 2;
3703 } else {
3704 /* 8 bit read access */
3705 val = io_mem_read(io_index, addr1, 1);
3706 stb_p(buf, val);
3707 l = 1;
3708 }
3709 } else {
3710 /* RAM case */
3711 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3712 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3713 qemu_put_ram_ptr(ptr);
3714 }
3715 }
3716 len -= l;
3717 buf += l;
3718 addr += l;
3719 }
3720}
3721
3722/* used for ROM loading : can write in RAM and ROM */
3723void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3724 const uint8_t *buf, int len)
3725{
3726 int l;
3727 uint8_t *ptr;
3728 target_phys_addr_t page;
3729 unsigned long pd;
3730 PhysPageDesc p;
3731
3732 while (len > 0) {
3733 page = addr & TARGET_PAGE_MASK;
3734 l = (page + TARGET_PAGE_SIZE) - addr;
3735 if (l > len)
3736 l = len;
3737 p = phys_page_find(page >> TARGET_PAGE_BITS);
3738 pd = p.phys_offset;
3739
3740 if (!is_ram_rom_romd(pd)) {
3741 /* do nothing */
3742 } else {
3743 unsigned long addr1;
3744 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3745 /* ROM/RAM case */
3746 ptr = qemu_get_ram_ptr(addr1);
3747 memcpy(ptr, buf, l);
3748 qemu_put_ram_ptr(ptr);
3749 }
3750 len -= l;
3751 buf += l;
3752 addr += l;
3753 }
3754}
3755
3756typedef struct {
3757 void *buffer;
3758 target_phys_addr_t addr;
3759 target_phys_addr_t len;
3760} BounceBuffer;
3761
3762static BounceBuffer bounce;
3763
3764typedef struct MapClient {
3765 void *opaque;
3766 void (*callback)(void *opaque);
3767 QLIST_ENTRY(MapClient) link;
3768} MapClient;
3769
3770static QLIST_HEAD(map_client_list, MapClient) map_client_list
3771 = QLIST_HEAD_INITIALIZER(map_client_list);
3772
3773void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3774{
3775 MapClient *client = g_malloc(sizeof(*client));
3776
3777 client->opaque = opaque;
3778 client->callback = callback;
3779 QLIST_INSERT_HEAD(&map_client_list, client, link);
3780 return client;
3781}
3782
3783void cpu_unregister_map_client(void *_client)
3784{
3785 MapClient *client = (MapClient *)_client;
3786
3787 QLIST_REMOVE(client, link);
3788 g_free(client);
3789}
3790
3791static void cpu_notify_map_clients(void)
3792{
3793 MapClient *client;
3794
3795 while (!QLIST_EMPTY(&map_client_list)) {
3796 client = QLIST_FIRST(&map_client_list);
3797 client->callback(client->opaque);
3798 cpu_unregister_map_client(client);
3799 }
3800}
3801
3802/* Map a physical memory region into a host virtual address.
3803 * May map a subset of the requested range, given by and returned in *plen.
3804 * May return NULL if resources needed to perform the mapping are exhausted.
3805 * Use only for reads OR writes - not for read-modify-write operations.
3806 * Use cpu_register_map_client() to know when retrying the map operation is
3807 * likely to succeed.
3808 */
3809void *cpu_physical_memory_map(target_phys_addr_t addr,
3810 target_phys_addr_t *plen,
3811 int is_write)
3812{
3813 target_phys_addr_t len = *plen;
3814 target_phys_addr_t todo = 0;
3815 int l;
3816 target_phys_addr_t page;
3817 unsigned long pd;
3818 PhysPageDesc p;
3819 ram_addr_t raddr = RAM_ADDR_MAX;
3820 ram_addr_t rlen;
3821 void *ret;
3822
3823 while (len > 0) {
3824 page = addr & TARGET_PAGE_MASK;
3825 l = (page + TARGET_PAGE_SIZE) - addr;
3826 if (l > len)
3827 l = len;
3828 p = phys_page_find(page >> TARGET_PAGE_BITS);
3829 pd = p.phys_offset;
3830
3831 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
3832 if (todo || bounce.buffer) {
3833 break;
3834 }
3835 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3836 bounce.addr = addr;
3837 bounce.len = l;
3838 if (!is_write) {
3839 cpu_physical_memory_read(addr, bounce.buffer, l);
3840 }
3841
3842 *plen = l;
3843 return bounce.buffer;
3844 }
3845 if (!todo) {
3846 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3847 }
3848
3849 len -= l;
3850 addr += l;
3851 todo += l;
3852 }
3853 rlen = todo;
3854 ret = qemu_ram_ptr_length(raddr, &rlen);
3855 *plen = rlen;
3856 return ret;
3857}
3858
3859/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
3860 * Will also mark the memory as dirty if is_write == 1. access_len gives
3861 * the amount of memory that was actually read or written by the caller.
3862 */
3863void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3864 int is_write, target_phys_addr_t access_len)
3865{
3866 if (buffer != bounce.buffer) {
3867 if (is_write) {
3868 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3869 while (access_len) {
3870 unsigned l;
3871 l = TARGET_PAGE_SIZE;
3872 if (l > access_len)
3873 l = access_len;
3874 if (!cpu_physical_memory_is_dirty(addr1)) {
3875 /* invalidate code */
3876 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3877 /* set dirty bit */
3878 cpu_physical_memory_set_dirty_flags(
3879 addr1, (0xff & ~CODE_DIRTY_FLAG));
3880 }
3881 addr1 += l;
3882 access_len -= l;
3883 }
3884 }
3885 if (xen_enabled()) {
3886 xen_invalidate_map_cache_entry(buffer);
3887 }
3888 return;
3889 }
3890 if (is_write) {
3891 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
3892 }
3893 qemu_vfree(bounce.buffer);
3894 bounce.buffer = NULL;
3895 cpu_notify_map_clients();
3896}
3897
3898/* warning: addr must be aligned */
3899static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3900 enum device_endian endian)
3901{
3902 int io_index;
3903 uint8_t *ptr;
3904 uint32_t val;
3905 unsigned long pd;
3906 PhysPageDesc p;
3907
3908 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3909 pd = p.phys_offset;
3910
3911 if (!is_ram_rom_romd(pd)) {
3912 /* I/O case */
3913 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3914 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3915 val = io_mem_read(io_index, addr, 4);
3916#if defined(TARGET_WORDS_BIGENDIAN)
3917 if (endian == DEVICE_LITTLE_ENDIAN) {
3918 val = bswap32(val);
3919 }
3920#else
3921 if (endian == DEVICE_BIG_ENDIAN) {
3922 val = bswap32(val);
3923 }
3924#endif
3925 } else {
3926 /* RAM case */
3927 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3928 (addr & ~TARGET_PAGE_MASK);
3929 switch (endian) {
3930 case DEVICE_LITTLE_ENDIAN:
3931 val = ldl_le_p(ptr);
3932 break;
3933 case DEVICE_BIG_ENDIAN:
3934 val = ldl_be_p(ptr);
3935 break;
3936 default:
3937 val = ldl_p(ptr);
3938 break;
3939 }
3940 }
3941 return val;
3942}
3943
3944uint32_t ldl_phys(target_phys_addr_t addr)
3945{
3946 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3947}
3948
3949uint32_t ldl_le_phys(target_phys_addr_t addr)
3950{
3951 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3952}
3953
3954uint32_t ldl_be_phys(target_phys_addr_t addr)
3955{
3956 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3957}
3958
3959/* warning: addr must be aligned */
3960static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3961 enum device_endian endian)
3962{
3963 int io_index;
3964 uint8_t *ptr;
3965 uint64_t val;
3966 unsigned long pd;
3967 PhysPageDesc p;
3968
3969 p = phys_page_find(addr >> TARGET_PAGE_BITS);
3970 pd = p.phys_offset;
3971
3972 if (!is_ram_rom_romd(pd)) {
3973 /* I/O case */
3974 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3975 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
3976
3977 /* XXX This is broken when device endian != cpu endian.
3978 Fix and add "endian" variable check */
3979#ifdef TARGET_WORDS_BIGENDIAN
3980 val = io_mem_read(io_index, addr, 4) << 32;
3981 val |= io_mem_read(io_index, addr + 4, 4);
3982#else
3983 val = io_mem_read(io_index, addr, 4);
3984 val |= io_mem_read(io_index, addr + 4, 4) << 32;
3985#endif
3986 } else {
3987 /* RAM case */
3988 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
3989 (addr & ~TARGET_PAGE_MASK);
3990 switch (endian) {
3991 case DEVICE_LITTLE_ENDIAN:
3992 val = ldq_le_p(ptr);
3993 break;
3994 case DEVICE_BIG_ENDIAN:
3995 val = ldq_be_p(ptr);
3996 break;
3997 default:
3998 val = ldq_p(ptr);
3999 break;
4000 }
4001 }
4002 return val;
4003}
4004
4005uint64_t ldq_phys(target_phys_addr_t addr)
4006{
4007 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4008}
4009
4010uint64_t ldq_le_phys(target_phys_addr_t addr)
4011{
4012 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4013}
4014
4015uint64_t ldq_be_phys(target_phys_addr_t addr)
4016{
4017 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4018}
4019
4020/* XXX: optimize */
4021uint32_t ldub_phys(target_phys_addr_t addr)
4022{
4023 uint8_t val;
4024 cpu_physical_memory_read(addr, &val, 1);
4025 return val;
4026}
4027
4028/* warning: addr must be aligned */
4029static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4030 enum device_endian endian)
4031{
4032 int io_index;
4033 uint8_t *ptr;
4034 uint64_t val;
4035 unsigned long pd;
4036 PhysPageDesc p;
4037
4038 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4039 pd = p.phys_offset;
4040
4041 if (!is_ram_rom_romd(pd)) {
4042 /* I/O case */
4043 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4044 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4045 val = io_mem_read(io_index, addr, 2);
4046#if defined(TARGET_WORDS_BIGENDIAN)
4047 if (endian == DEVICE_LITTLE_ENDIAN) {
4048 val = bswap16(val);
4049 }
4050#else
4051 if (endian == DEVICE_BIG_ENDIAN) {
4052 val = bswap16(val);
4053 }
4054#endif
4055 } else {
4056 /* RAM case */
4057 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4058 (addr & ~TARGET_PAGE_MASK);
4059 switch (endian) {
4060 case DEVICE_LITTLE_ENDIAN:
4061 val = lduw_le_p(ptr);
4062 break;
4063 case DEVICE_BIG_ENDIAN:
4064 val = lduw_be_p(ptr);
4065 break;
4066 default:
4067 val = lduw_p(ptr);
4068 break;
4069 }
4070 }
4071 return val;
4072}
4073
4074uint32_t lduw_phys(target_phys_addr_t addr)
4075{
4076 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4077}
4078
4079uint32_t lduw_le_phys(target_phys_addr_t addr)
4080{
4081 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4082}
4083
4084uint32_t lduw_be_phys(target_phys_addr_t addr)
4085{
4086 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4087}
4088
4089/* warning: addr must be aligned. The ram page is not masked as dirty
4090 and the code inside is not invalidated. It is useful if the dirty
4091 bits are used to track modified PTEs */
4092void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4093{
4094 int io_index;
4095 uint8_t *ptr;
4096 unsigned long pd;
4097 PhysPageDesc p;
4098
4099 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4100 pd = p.phys_offset;
4101
4102 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4103 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4104 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4105 io_mem_write(io_index, addr, val, 4);
4106 } else {
4107 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4108 ptr = qemu_get_ram_ptr(addr1);
4109 stl_p(ptr, val);
4110
4111 if (unlikely(in_migration)) {
4112 if (!cpu_physical_memory_is_dirty(addr1)) {
4113 /* invalidate code */
4114 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4115 /* set dirty bit */
4116 cpu_physical_memory_set_dirty_flags(
4117 addr1, (0xff & ~CODE_DIRTY_FLAG));
4118 }
4119 }
4120 }
4121}
4122
4123void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4124{
4125 int io_index;
4126 uint8_t *ptr;
4127 unsigned long pd;
4128 PhysPageDesc p;
4129
4130 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4131 pd = p.phys_offset;
4132
4133 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4134 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4135 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4136#ifdef TARGET_WORDS_BIGENDIAN
4137 io_mem_write(io_index, addr, val >> 32, 4);
4138 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4139#else
4140 io_mem_write(io_index, addr, (uint32_t)val, 4);
4141 io_mem_write(io_index, addr + 4, val >> 32, 4);
4142#endif
4143 } else {
4144 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4145 (addr & ~TARGET_PAGE_MASK);
4146 stq_p(ptr, val);
4147 }
4148}
4149
4150/* warning: addr must be aligned */
4151static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4152 enum device_endian endian)
4153{
4154 int io_index;
4155 uint8_t *ptr;
4156 unsigned long pd;
4157 PhysPageDesc p;
4158
4159 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4160 pd = p.phys_offset;
4161
4162 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4163 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4164 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4165#if defined(TARGET_WORDS_BIGENDIAN)
4166 if (endian == DEVICE_LITTLE_ENDIAN) {
4167 val = bswap32(val);
4168 }
4169#else
4170 if (endian == DEVICE_BIG_ENDIAN) {
4171 val = bswap32(val);
4172 }
4173#endif
4174 io_mem_write(io_index, addr, val, 4);
4175 } else {
4176 unsigned long addr1;
4177 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4178 /* RAM case */
4179 ptr = qemu_get_ram_ptr(addr1);
4180 switch (endian) {
4181 case DEVICE_LITTLE_ENDIAN:
4182 stl_le_p(ptr, val);
4183 break;
4184 case DEVICE_BIG_ENDIAN:
4185 stl_be_p(ptr, val);
4186 break;
4187 default:
4188 stl_p(ptr, val);
4189 break;
4190 }
4191 if (!cpu_physical_memory_is_dirty(addr1)) {
4192 /* invalidate code */
4193 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4194 /* set dirty bit */
4195 cpu_physical_memory_set_dirty_flags(addr1,
4196 (0xff & ~CODE_DIRTY_FLAG));
4197 }
4198 }
4199}
4200
4201void stl_phys(target_phys_addr_t addr, uint32_t val)
4202{
4203 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4204}
4205
4206void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4207{
4208 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4209}
4210
4211void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4212{
4213 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4214}
4215
4216/* XXX: optimize */
4217void stb_phys(target_phys_addr_t addr, uint32_t val)
4218{
4219 uint8_t v = val;
4220 cpu_physical_memory_write(addr, &v, 1);
4221}
4222
4223/* warning: addr must be aligned */
4224static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4225 enum device_endian endian)
4226{
4227 int io_index;
4228 uint8_t *ptr;
4229 unsigned long pd;
4230 PhysPageDesc p;
4231
4232 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4233 pd = p.phys_offset;
4234
4235 if ((pd & ~TARGET_PAGE_MASK) != io_mem_ram.ram_addr) {
4236 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4237 addr = (addr & ~TARGET_PAGE_MASK) + p.region_offset;
4238#if defined(TARGET_WORDS_BIGENDIAN)
4239 if (endian == DEVICE_LITTLE_ENDIAN) {
4240 val = bswap16(val);
4241 }
4242#else
4243 if (endian == DEVICE_BIG_ENDIAN) {
4244 val = bswap16(val);
4245 }
4246#endif
4247 io_mem_write(io_index, addr, val, 2);
4248 } else {
4249 unsigned long addr1;
4250 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4251 /* RAM case */
4252 ptr = qemu_get_ram_ptr(addr1);
4253 switch (endian) {
4254 case DEVICE_LITTLE_ENDIAN:
4255 stw_le_p(ptr, val);
4256 break;
4257 case DEVICE_BIG_ENDIAN:
4258 stw_be_p(ptr, val);
4259 break;
4260 default:
4261 stw_p(ptr, val);
4262 break;
4263 }
4264 if (!cpu_physical_memory_is_dirty(addr1)) {
4265 /* invalidate code */
4266 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4267 /* set dirty bit */
4268 cpu_physical_memory_set_dirty_flags(addr1,
4269 (0xff & ~CODE_DIRTY_FLAG));
4270 }
4271 }
4272}
4273
4274void stw_phys(target_phys_addr_t addr, uint32_t val)
4275{
4276 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4277}
4278
4279void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4280{
4281 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4282}
4283
4284void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4285{
4286 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4287}
4288
4289/* XXX: optimize */
4290void stq_phys(target_phys_addr_t addr, uint64_t val)
4291{
4292 val = tswap64(val);
4293 cpu_physical_memory_write(addr, &val, 8);
4294}
4295
4296void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4297{
4298 val = cpu_to_le64(val);
4299 cpu_physical_memory_write(addr, &val, 8);
4300}
4301
4302void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4303{
4304 val = cpu_to_be64(val);
4305 cpu_physical_memory_write(addr, &val, 8);
4306}
4307
4308/* virtual memory access for debug (includes writing to ROM) */
4309int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4310 uint8_t *buf, int len, int is_write)
4311{
4312 int l;
4313 target_phys_addr_t phys_addr;
4314 target_ulong page;
4315
4316 while (len > 0) {
4317 page = addr & TARGET_PAGE_MASK;
4318 phys_addr = cpu_get_phys_page_debug(env, page);
4319 /* if no physical page mapped, return an error */
4320 if (phys_addr == -1)
4321 return -1;
4322 l = (page + TARGET_PAGE_SIZE) - addr;
4323 if (l > len)
4324 l = len;
4325 phys_addr += (addr & ~TARGET_PAGE_MASK);
4326 if (is_write)
4327 cpu_physical_memory_write_rom(phys_addr, buf, l);
4328 else
4329 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4330 len -= l;
4331 buf += l;
4332 addr += l;
4333 }
4334 return 0;
4335}
4336#endif
4337
4338/* in deterministic execution mode, instructions doing device I/Os
4339 must be at the end of the TB */
4340void cpu_io_recompile(CPUState *env, void *retaddr)
4341{
4342 TranslationBlock *tb;
4343 uint32_t n, cflags;
4344 target_ulong pc, cs_base;
4345 uint64_t flags;
4346
4347 tb = tb_find_pc((unsigned long)retaddr);
4348 if (!tb) {
4349 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4350 retaddr);
4351 }
4352 n = env->icount_decr.u16.low + tb->icount;
4353 cpu_restore_state(tb, env, (unsigned long)retaddr);
4354 /* Calculate how many instructions had been executed before the fault
4355 occurred. */
4356 n = n - env->icount_decr.u16.low;
4357 /* Generate a new TB ending on the I/O insn. */
4358 n++;
4359 /* On MIPS and SH, delay slot instructions can only be restarted if
4360 they were already the first instruction in the TB. If this is not
4361 the first instruction in a TB then re-execute the preceding
4362 branch. */
4363#if defined(TARGET_MIPS)
4364 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4365 env->active_tc.PC -= 4;
4366 env->icount_decr.u16.low++;
4367 env->hflags &= ~MIPS_HFLAG_BMASK;
4368 }
4369#elif defined(TARGET_SH4)
4370 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4371 && n > 1) {
4372 env->pc -= 2;
4373 env->icount_decr.u16.low++;
4374 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4375 }
4376#endif
4377 /* This should never happen. */
4378 if (n > CF_COUNT_MASK)
4379 cpu_abort(env, "TB too big during recompile");
4380
4381 cflags = n | CF_LAST_IO;
4382 pc = tb->pc;
4383 cs_base = tb->cs_base;
4384 flags = tb->flags;
4385 tb_phys_invalidate(tb, -1);
4386 /* FIXME: In theory this could raise an exception. In practice
4387 we have already translated the block once so it's probably ok. */
4388 tb_gen_code(env, pc, cs_base, flags, cflags);
4389 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4390 the first in the TB) then we end up generating a whole new TB and
4391 repeating the fault, which is horribly inefficient.
4392 Better would be to execute just this insn uncached, or generate a
4393 second new TB. */
4394 cpu_resume_from_signal(env, NULL);
4395}
4396
4397#if !defined(CONFIG_USER_ONLY)
4398
4399void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4400{
4401 int i, target_code_size, max_target_code_size;
4402 int direct_jmp_count, direct_jmp2_count, cross_page;
4403 TranslationBlock *tb;
4404
4405 target_code_size = 0;
4406 max_target_code_size = 0;
4407 cross_page = 0;
4408 direct_jmp_count = 0;
4409 direct_jmp2_count = 0;
4410 for(i = 0; i < nb_tbs; i++) {
4411 tb = &tbs[i];
4412 target_code_size += tb->size;
4413 if (tb->size > max_target_code_size)
4414 max_target_code_size = tb->size;
4415 if (tb->page_addr[1] != -1)
4416 cross_page++;
4417 if (tb->tb_next_offset[0] != 0xffff) {
4418 direct_jmp_count++;
4419 if (tb->tb_next_offset[1] != 0xffff) {
4420 direct_jmp2_count++;
4421 }
4422 }
4423 }
4424 /* XXX: avoid using doubles ? */
4425 cpu_fprintf(f, "Translation buffer state:\n");
4426 cpu_fprintf(f, "gen code size %td/%ld\n",
4427 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4428 cpu_fprintf(f, "TB count %d/%d\n",
4429 nb_tbs, code_gen_max_blocks);
4430 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4431 nb_tbs ? target_code_size / nb_tbs : 0,
4432 max_target_code_size);
4433 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4434 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4435 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4436 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4437 cross_page,
4438 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4439 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4440 direct_jmp_count,
4441 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4442 direct_jmp2_count,
4443 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4444 cpu_fprintf(f, "\nStatistics:\n");
4445 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4446 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4447 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4448 tcg_dump_info(f, cpu_fprintf);
4449}
4450
4451/* NOTE: this function can trigger an exception */
4452/* NOTE2: the returned address is not exactly the physical address: it
4453 is the offset relative to phys_ram_base */
4454tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4455{
4456 int mmu_idx, page_index, pd;
4457 void *p;
4458
4459 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4460 mmu_idx = cpu_mmu_index(env1);
4461 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4462 (addr & TARGET_PAGE_MASK))) {
4463 ldub_code(addr);
4464 }
4465 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4466 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4467 && !(pd & IO_MEM_ROMD)) {
4468#if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4469 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4470#else
4471 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4472#endif
4473 }
4474 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4475 return qemu_ram_addr_from_host_nofail(p);
4476}
4477
4478#define MMUSUFFIX _cmmu
4479#undef GETPC
4480#define GETPC() NULL
4481#define env cpu_single_env
4482#define SOFTMMU_CODE_ACCESS
4483
4484#define SHIFT 0
4485#include "softmmu_template.h"
4486
4487#define SHIFT 1
4488#include "softmmu_template.h"
4489
4490#define SHIFT 2
4491#include "softmmu_template.h"
4492
4493#define SHIFT 3
4494#include "softmmu_template.h"
4495
4496#undef env
4497
4498#endif