X-Git-Url: https://git.proxmox.com/?a=blobdiff_plain;f=translate-all.c;h=b3ee876526a814458ae6293f46c618df27e2ae41;hb=8251a72f8bb8764c13dd5ba627e7d9e6d35d188e;hp=b6663dc91d66570aa0558a103b44cbfa0bd6e7cf;hpb=2e1ae44a4f4a6149fbb9dc812243522f07284700;p=mirror_qemu.git diff --git a/translate-all.c b/translate-all.c index b6663dc91d..b3ee876526 100644 --- a/translate-all.c +++ b/translate-all.c @@ -25,12 +25,13 @@ #include "qemu-common.h" #define NO_CPU_IO_DEFS #include "cpu.h" -#include "trace.h" +#include "trace-root.h" #include "disas/disas.h" #include "exec/exec-all.h" #include "tcg.h" #if defined(CONFIG_USER_ONLY) #include "qemu.h" +#include "exec/exec-all.h" #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) #include #if __FreeBSD_version >= 700104 @@ -54,18 +55,32 @@ #include "translate-all.h" #include "qemu/bitmap.h" #include "qemu/timer.h" +#include "qemu/main-loop.h" #include "exec/log.h" +#include "sysemu/cpus.h" -//#define DEBUG_TB_INVALIDATE -//#define DEBUG_FLUSH +/* #define DEBUG_TB_INVALIDATE */ +/* #define DEBUG_TB_FLUSH */ /* make various TB consistency checks */ -//#define DEBUG_TB_CHECK +/* #define DEBUG_TB_CHECK */ #if !defined(CONFIG_USER_ONLY) /* TB consistency checks only implemented for usermode emulation. */ #undef DEBUG_TB_CHECK #endif +/* Access to the various translations structures need to be serialised via locks + * for consistency. This is automatic for SoftMMU based system + * emulation due to its single threaded nature. In user-mode emulation + * access to the memory related structures are protected with the + * mmap_lock. + */ +#ifdef CONFIG_SOFTMMU +#define assert_memory_lock() tcg_debug_assert(have_tb_lock) +#else +#define assert_memory_lock() tcg_debug_assert(have_mmap_lock()) +#endif + #define SMC_BITMAP_USE_THRESHOLD 10 typedef struct PageDesc { @@ -97,60 +112,75 @@ typedef struct PageDesc { #define V_L2_BITS 10 #define V_L2_SIZE (1 << V_L2_BITS) -/* The bits remaining after N lower levels of page tables. */ -#define V_L1_BITS_REM \ - ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS) - -#if V_L1_BITS_REM < 4 -#define V_L1_BITS (V_L1_BITS_REM + V_L2_BITS) -#else -#define V_L1_BITS V_L1_BITS_REM -#endif - -#define V_L1_SIZE ((target_ulong)1 << V_L1_BITS) - -#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) - uintptr_t qemu_host_page_size; intptr_t qemu_host_page_mask; -/* The bottom level has pointers to PageDesc */ -static void *l1_map[V_L1_SIZE]; +/* + * L1 Mapping properties + */ +static int v_l1_size; +static int v_l1_shift; +static int v_l2_levels; + +/* The bottom level has pointers to PageDesc, and is indexed by + * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size. + */ +#define V_L1_MIN_BITS 4 +#define V_L1_MAX_BITS (V_L2_BITS + 3) +#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS) + +static void *l1_map[V_L1_MAX_SIZE]; /* code generation context */ TCGContext tcg_ctx; +bool parallel_cpus; /* translation block context */ -#ifdef CONFIG_USER_ONLY __thread int have_tb_lock; -#endif + +static void page_table_config_init(void) +{ + uint32_t v_l1_bits; + + assert(TARGET_PAGE_BITS); + /* The bits remaining after N lower levels of page tables. */ + v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS; + if (v_l1_bits < V_L1_MIN_BITS) { + v_l1_bits += V_L2_BITS; + } + + v_l1_size = 1 << v_l1_bits; + v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits; + v_l2_levels = v_l1_shift / V_L2_BITS - 1; + + assert(v_l1_bits <= V_L1_MAX_BITS); + assert(v_l1_shift % V_L2_BITS == 0); + assert(v_l2_levels >= 0); +} + +#define assert_tb_locked() tcg_debug_assert(have_tb_lock) +#define assert_tb_unlocked() tcg_debug_assert(!have_tb_lock) void tb_lock(void) { -#ifdef CONFIG_USER_ONLY - assert(!have_tb_lock); + assert_tb_unlocked(); qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); have_tb_lock++; -#endif } void tb_unlock(void) { -#ifdef CONFIG_USER_ONLY - assert(have_tb_lock); + assert_tb_locked(); have_tb_lock--; qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); -#endif } void tb_lock_reset(void) { -#ifdef CONFIG_USER_ONLY if (have_tb_lock) { qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); have_tb_lock = 0; } -#endif } static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); @@ -247,7 +277,9 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) return p - block; } -/* The cpu state corresponding to 'searched_pc' is restored. */ +/* The cpu state corresponding to 'searched_pc' is restored. + * Called with tb_lock held. + */ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc) { @@ -260,6 +292,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, int64_t ti = profile_getclock(); #endif + searched_pc -= GETPC_ADJ; + if (searched_pc < host_pc) { return -1; } @@ -298,7 +332,22 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; + bool r = false; + + /* A retaddr of zero is invalid so we really shouldn't have ended + * up here. The target code has likely forgotten to check retaddr + * != 0 before attempting to restore state. We return early to + * avoid blowing up on a recursive tb_lock(). The target must have + * previously survived a failed cpu_restore_state because + * tb_find_pc(0) would have failed anyway. It still should be + * fixed though. + */ + + if (!retaddr) { + return r; + } + tb_lock(); tb = tb_find_pc(retaddr); if (tb) { cpu_restore_state_from_tb(cpu, tb, retaddr); @@ -307,9 +356,11 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) tb_phys_invalidate(tb, -1); tb_free(tb); } - return true; + r = true; } - return false; + tb_unlock(); + + return r; } void page_size_init(void) @@ -330,6 +381,8 @@ void page_size_init(void) static void page_init(void) { page_size_init(); + page_table_config_init(); + #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY) { #ifdef HAVE_KINFO_GETVMMAP @@ -397,6 +450,7 @@ static void page_init(void) } /* If alloc=1: + * Called with tb_lock held for system emulation. * Called with mmap_lock held for user-mode emulation. */ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) @@ -405,11 +459,15 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) void **lp; int i; + if (alloc) { + assert_memory_lock(); + } + /* Level 1. Always allocated. */ - lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1)); + lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1)); /* Level 2..N-1. */ - for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) { + for (i = v_l2_levels; i > 0; i--) { void **p = atomic_rcu_read(lp); if (p == NULL) { @@ -677,7 +735,7 @@ static inline void *alloc_code_gen_buffer(void) size_t size2; void *buf2 = mmap(NULL, size + qemu_real_host_page_size, PROT_NONE, flags, -1, 0); - switch (buf2 != MAP_FAILED) { + switch ((int)(buf2 != MAP_FAILED)) { case 1: if (!cross_256mb(buf2, size)) { /* Success! Use the new buffer. */ @@ -761,12 +819,18 @@ bool tcg_enabled(void) return tcg_ctx.code_gen_buffer != NULL; } -/* Allocate a new translation block. Flush the translation buffer if - too many translation blocks or too much generated code. */ +/* + * Allocate a new translation block. Flush the translation buffer if + * too many translation blocks or too much generated code. + * + * Called with tb_lock held. + */ static TranslationBlock *tb_alloc(target_ulong pc) { TranslationBlock *tb; + assert_tb_locked(); + if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) { return NULL; } @@ -777,8 +841,11 @@ static TranslationBlock *tb_alloc(target_ulong pc) return tb; } +/* Called with tb_lock held. */ void tb_free(TranslationBlock *tb) { + assert_tb_locked(); + /* In practice this is mostly used for single use temporary TB Ignore the hard cases and just back up if this TB happens to be the last one generated. */ @@ -824,21 +891,26 @@ static void page_flush_tb_1(int level, void **lp) static void page_flush_tb(void) { - int i; + int i, l1_sz = v_l1_size; - for (i = 0; i < V_L1_SIZE; i++) { - page_flush_tb_1(V_L1_SHIFT / V_L2_BITS - 1, l1_map + i); + for (i = 0; i < l1_sz; i++) { + page_flush_tb_1(v_l2_levels, l1_map + i); } } /* flush all the translation blocks */ -/* XXX: tb_flush is currently not thread safe */ -void tb_flush(CPUState *cpu) +static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count) { - if (!tcg_enabled()) { - return; + tb_lock(); + + /* If it is already been done on request of another CPU, + * just retry. + */ + if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_count.host_int) { + goto done; } -#if defined(DEBUG_FLUSH) + +#if defined(DEBUG_TB_FLUSH) printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? @@ -856,7 +928,6 @@ void tb_flush(CPUState *cpu) for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) { atomic_set(&cpu->tb_jmp_cache[i], NULL); } - atomic_mb_set(&cpu->tb_flushed, true); } tcg_ctx.tb_ctx.nb_tbs = 0; @@ -866,7 +937,20 @@ void tb_flush(CPUState *cpu) tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; /* XXX: flush processor icache at this point if cache flush is expensive */ - tcg_ctx.tb_ctx.tb_flush_count++; + atomic_mb_set(&tcg_ctx.tb_ctx.tb_flush_count, + tcg_ctx.tb_ctx.tb_flush_count + 1); + +done: + tb_unlock(); +} + +void tb_flush(CPUState *cpu) +{ + if (tcg_enabled()) { + unsigned tb_flush_count = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count); + async_safe_run_on_cpu(cpu, do_tb_flush, + RUN_ON_CPU_HOST_INT(tb_flush_count)); + } } #ifdef DEBUG_TB_CHECK @@ -883,6 +967,10 @@ do_tb_invalidate_check(struct qht *ht, void *p, uint32_t hash, void *userp) } } +/* verify that all the pages have correct rights for code + * + * Called with tb_lock held. + */ static void tb_invalidate_check(target_ulong address) { address &= TARGET_PAGE_MASK; @@ -987,7 +1075,10 @@ static inline void tb_jmp_unlink(TranslationBlock *tb) } } -/* invalidate one TB */ +/* invalidate one TB + * + * Called with tb_lock held. + */ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { CPUState *cpu; @@ -995,6 +1086,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) uint32_t h; tb_page_addr_t phys_pc; + assert_tb_locked(); + atomic_set(&tb->invalid, true); /* remove the TB from the hash list */ @@ -1052,7 +1145,7 @@ static void build_page_bitmap(PageDesc *p) tb_end = tb_start + tb->size; if (tb_end > TARGET_PAGE_SIZE) { tb_end = TARGET_PAGE_SIZE; - } + } } else { tb_start = 0; tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK); @@ -1075,6 +1168,8 @@ static inline void tb_alloc_page(TranslationBlock *tb, bool page_already_protected; #endif + assert_memory_lock(); + tb->page_addr[n] = page_addr; p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1); tb->page_next[n] = p->first_tb; @@ -1131,6 +1226,8 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, { uint32_t h; + assert_memory_lock(); + /* add in the page list */ tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK); if (phys_page2 != -1) { @@ -1162,6 +1259,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef CONFIG_PROFILER int64_t ti; #endif + assert_memory_lock(); phys_pc = get_page_addr_code(env, pc); if (use_icount && !(cflags & CF_IGNORE_ICOUNT)) { @@ -1173,9 +1271,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, buffer_overflow: /* flush must be done */ tb_flush(cpu); - /* cannot fail at this point */ - tb = tb_alloc(pc); - assert(tb != NULL); + mmap_unlock(); + /* Make the execution loop process the flush as soon as possible. */ + cpu->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(cpu); } gen_code_buf = tcg_ctx.code_gen_ptr; @@ -1240,10 +1339,12 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #ifdef DEBUG_DISAS if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && qemu_log_in_addr_range(tb->pc)) { + qemu_log_lock(); qemu_log("OUT: [size=%d]\n", gen_code_size); log_disas(tb->tc_ptr, gen_code_size); qemu_log("\n"); qemu_log_flush(); + qemu_log_unlock(); } #endif @@ -1287,9 +1388,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, * access: the virtual CPU will exit the current TB if code is modified inside * this TB. * - * Called with mmap_lock held for user-mode emulation + * Called with mmap_lock held for user-mode emulation, grabs tb_lock + * Called with tb_lock held for system-mode emulation */ -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) +static void tb_invalidate_phys_range_1(tb_page_addr_t start, tb_page_addr_t end) { while (start < end) { tb_invalidate_phys_page_range(start, end, 0); @@ -1298,6 +1400,21 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) } } +#ifdef CONFIG_SOFTMMU +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) +{ + assert_tb_locked(); + tb_invalidate_phys_range_1(start, end); +} +#else +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) +{ + assert_memory_lock(); + tb_lock(); + tb_invalidate_phys_range_1(start, end); + tb_unlock(); +} +#endif /* * Invalidate all TBs which intersect with the target physical address range * [start;end[. NOTE: start and end must refer to the *same* physical page. @@ -1305,7 +1422,8 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) * access: the virtual CPU will exit the current TB if code is modified inside * this TB. * - * Called with mmap_lock held for user-mode emulation + * Called with tb_lock/mmap_lock held for user-mode emulation + * Called with tb_lock held for system-mode emulation */ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access) @@ -1327,6 +1445,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, uint32_t current_flags = 0; #endif /* TARGET_HAS_PRECISE_SMC */ + assert_memory_lock(); + assert_tb_locked(); + p = page_find(start >> TARGET_PAGE_BITS); if (!p) { return; @@ -1402,7 +1523,10 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, } #ifdef CONFIG_SOFTMMU -/* len must be <= 8 and start must be a multiple of len */ +/* len must be <= 8 and start must be a multiple of len. + * Called via softmmu_template.h when code areas are written to with + * iothread mutex not held. + */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) { PageDesc *p; @@ -1416,13 +1540,17 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) (intptr_t)cpu_single_env->segs[R_CS].base); } #endif + assert_memory_lock(); + p = page_find(start >> TARGET_PAGE_BITS); if (!p) { return; } if (!p->code_bitmap && ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { - /* build code bitmap */ + /* build code bitmap. FIXME: writes should be protected by + * tb_lock, reads by tb_lock or RCU. + */ build_page_bitmap(p); } if (p->code_bitmap) { @@ -1461,11 +1589,15 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) uint32_t current_flags = 0; #endif + assert_memory_lock(); + addr &= TARGET_PAGE_MASK; p = page_find(addr >> TARGET_PAGE_BITS); if (!p) { return false; } + + tb_lock(); tb = p->first_tb; #ifdef TARGET_HAS_PRECISE_SMC if (tb && pc != 0) { @@ -1503,9 +1635,13 @@ static bool tb_invalidate_phys_page(tb_page_addr_t addr, uintptr_t pc) modifying the memory. It will ensure that it cannot modify itself */ tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1); + /* tb_lock will be reset after cpu_loop_exit_noexc longjmps + * back into the cpu_exec loop. */ return true; } #endif + tb_unlock(); + return false; } #endif @@ -1558,11 +1694,14 @@ void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr) return; } ram_addr = memory_region_get_ram_addr(mr) + addr; + tb_lock(); tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); + tb_unlock(); rcu_read_unlock(); } #endif /* !defined(CONFIG_USER_ONLY) */ +/* Called with tb_lock held. */ void tb_check_watchpoint(CPUState *cpu) { TranslationBlock *tb; @@ -1588,7 +1727,10 @@ void tb_check_watchpoint(CPUState *cpu) #ifndef CONFIG_USER_ONLY /* in deterministic execution mode, instructions doing device I/Os - must be at the end of the TB */ + * must be at the end of the TB. + * + * Called by softmmu_template.h, with iothread mutex not held. + */ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) { #if defined(TARGET_MIPS) || defined(TARGET_SH4) @@ -1599,6 +1741,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) target_ulong pc, cs_base; uint32_t flags; + tb_lock(); tb = tb_find_pc(retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", @@ -1650,11 +1793,16 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) /* FIXME: In theory this could raise an exception. In practice we have already translated the block once so it's probably ok. */ tb_gen_code(cpu, pc, cs_base, flags, cflags); + /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not - the first in the TB) then we end up generating a whole new TB and - repeating the fault, which is horribly inefficient. - Better would be to execute just this insn uncached, or generate a - second new TB. */ + * the first in the TB) then we end up generating a whole new TB and + * repeating the fault, which is horribly inefficient. + * Better would be to execute just this insn uncached, or generate a + * second new TB. + * + * cpu_loop_exit_noexc will longjmp back to cpu_exec where the + * tb_lock gets reset. + */ cpu_loop_exit_noexc(cpu); } @@ -1718,6 +1866,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) TranslationBlock *tb; struct qht_stats hst; + tb_lock(); + target_code_size = 0; max_target_code_size = 0; cross_page = 0; @@ -1773,11 +1923,14 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) qht_statistics_destroy(&hst); cpu_fprintf(f, "\nStatistics:\n"); - cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); + cpu_fprintf(f, "TB flush count %u\n", + atomic_read(&tcg_ctx.tb_ctx.tb_flush_count)); cpu_fprintf(f, "TB invalidate count %d\n", tcg_ctx.tb_ctx.tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); + + tb_unlock(); } void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf) @@ -1789,8 +1942,9 @@ void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf) void cpu_interrupt(CPUState *cpu, int mask) { + g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - cpu->tcg_exit_req = 1; + cpu->icount_decr.u16.high = -1; } /* @@ -1863,16 +2017,16 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data, int walk_memory_regions(void *priv, walk_memory_regions_fn fn) { struct walk_memory_regions_data data; - uintptr_t i; + uintptr_t i, l1_sz = v_l1_size; data.fn = fn; data.priv = priv; data.start = -1u; data.prot = 0; - for (i = 0; i < V_L1_SIZE; i++) { - int rc = walk_memory_regions_1(&data, (target_ulong)i << (V_L1_SHIFT + TARGET_PAGE_BITS), - V_L1_SHIFT / V_L2_BITS - 1, l1_map + i); + for (i = 0; i < l1_sz; i++) { + target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS); + int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i); if (rc != 0) { return rc; } @@ -1930,6 +2084,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) assert(end < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS)); #endif assert(start < end); + assert_memory_lock(); start = start & TARGET_PAGE_MASK; end = TARGET_PAGE_ALIGN(end);