]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/translate-all.c
block: use bdrv_co_debug_event in coroutine context
[mirror_qemu.git] / accel / tcg / translate-all.c
1 /*
2 * Host code generation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21
22 #include "trace.h"
23 #include "disas/disas.h"
24 #include "exec/exec-all.h"
25 #include "tcg/tcg.h"
26 #if defined(CONFIG_USER_ONLY)
27 #include "qemu.h"
28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
29 #include <sys/param.h>
30 #if __FreeBSD_version >= 700104
31 #define HAVE_KINFO_GETVMMAP
32 #define sigqueue sigqueue_freebsd /* avoid redefinition */
33 #include <sys/proc.h>
34 #include <machine/profile.h>
35 #define _KERNEL
36 #include <sys/user.h>
37 #undef _KERNEL
38 #undef sigqueue
39 #include <libutil.h>
40 #endif
41 #endif
42 #else
43 #include "exec/ram_addr.h"
44 #endif
45
46 #include "exec/cputlb.h"
47 #include "exec/translate-all.h"
48 #include "exec/translator.h"
49 #include "exec/tb-flush.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "qemu/timer.h"
55 #include "exec/log.h"
56 #include "sysemu/cpus.h"
57 #include "sysemu/cpu-timers.h"
58 #include "sysemu/tcg.h"
59 #include "qapi/error.h"
60 #include "hw/core/tcg-cpu-ops.h"
61 #include "tb-jmp-cache.h"
62 #include "tb-hash.h"
63 #include "tb-context.h"
64 #include "internal.h"
65 #include "perf.h"
66 #include "tcg/insn-start-words.h"
67
68 TBContext tb_ctx;
69
70 /*
71 * Encode VAL as a signed leb128 sequence at P.
72 * Return P incremented past the encoded value.
73 */
74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
75 {
76 int more, byte;
77
78 do {
79 byte = val & 0x7f;
80 val >>= 7;
81 more = !((val == 0 && (byte & 0x40) == 0)
82 || (val == -1 && (byte & 0x40) != 0));
83 if (more) {
84 byte |= 0x80;
85 }
86 *p++ = byte;
87 } while (more);
88
89 return p;
90 }
91
92 /*
93 * Decode a signed leb128 sequence at *PP; increment *PP past the
94 * decoded value. Return the decoded value.
95 */
96 static int64_t decode_sleb128(const uint8_t **pp)
97 {
98 const uint8_t *p = *pp;
99 int64_t val = 0;
100 int byte, shift = 0;
101
102 do {
103 byte = *p++;
104 val |= (int64_t)(byte & 0x7f) << shift;
105 shift += 7;
106 } while (byte & 0x80);
107 if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
108 val |= -(int64_t)1 << shift;
109 }
110
111 *pp = p;
112 return val;
113 }
114
115 /* Encode the data collected about the instructions while compiling TB.
116 Place the data at BLOCK, and return the number of bytes consumed.
117
118 The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
119 which come from the target's insn_start data, followed by a uintptr_t
120 which comes from the host pc of the end of the code implementing the insn.
121
122 Each line of the table is encoded as sleb128 deltas from the previous
123 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
124 That is, the first column is seeded with the guest pc, the last column
125 with the host pc, and the middle columns with zeros. */
126
127 static int encode_search(TranslationBlock *tb, uint8_t *block)
128 {
129 uint8_t *highwater = tcg_ctx->code_gen_highwater;
130 uint64_t *insn_data = tcg_ctx->gen_insn_data;
131 uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
132 uint8_t *p = block;
133 int i, j, n;
134
135 for (i = 0, n = tb->icount; i < n; ++i) {
136 uint64_t prev, curr;
137
138 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
139 if (i == 0) {
140 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
141 } else {
142 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
143 }
144 curr = insn_data[i * TARGET_INSN_START_WORDS + j];
145 p = encode_sleb128(p, curr - prev);
146 }
147 prev = (i == 0 ? 0 : insn_end_off[i - 1]);
148 curr = insn_end_off[i];
149 p = encode_sleb128(p, curr - prev);
150
151 /* Test for (pending) buffer overflow. The assumption is that any
152 one row beginning below the high water mark cannot overrun
153 the buffer completely. Thus we can test for overflow after
154 encoding a row without having to check during encoding. */
155 if (unlikely(p > highwater)) {
156 return -1;
157 }
158 }
159
160 return p - block;
161 }
162
163 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
164 uint64_t *data)
165 {
166 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
167 const uint8_t *p = tb->tc.ptr + tb->tc.size;
168 int i, j, num_insns = tb->icount;
169
170 host_pc -= GETPC_ADJ;
171
172 if (host_pc < iter_pc) {
173 return -1;
174 }
175
176 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
177 if (!(tb_cflags(tb) & CF_PCREL)) {
178 data[0] = tb->pc;
179 }
180
181 /*
182 * Reconstruct the stored insn data while looking for the point
183 * at which the end of the insn exceeds host_pc.
184 */
185 for (i = 0; i < num_insns; ++i) {
186 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
187 data[j] += decode_sleb128(&p);
188 }
189 iter_pc += decode_sleb128(&p);
190 if (iter_pc > host_pc) {
191 return num_insns - i;
192 }
193 }
194 return -1;
195 }
196
197 /*
198 * The cpu state corresponding to 'host_pc' is restored in
199 * preparation for exiting the TB.
200 */
201 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
202 uintptr_t host_pc)
203 {
204 uint64_t data[TARGET_INSN_START_WORDS];
205 #ifdef CONFIG_PROFILER
206 TCGProfile *prof = &tcg_ctx->prof;
207 int64_t ti = profile_getclock();
208 #endif
209 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
210
211 if (insns_left < 0) {
212 return;
213 }
214
215 if (tb_cflags(tb) & CF_USE_ICOUNT) {
216 assert(icount_enabled());
217 /*
218 * Reset the cycle counter to the start of the block and
219 * shift if to the number of actually executed instructions.
220 */
221 cpu_neg(cpu)->icount_decr.u16.low += insns_left;
222 }
223
224 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
225
226 #ifdef CONFIG_PROFILER
227 qatomic_set(&prof->restore_time,
228 prof->restore_time + profile_getclock() - ti);
229 qatomic_set(&prof->restore_count, prof->restore_count + 1);
230 #endif
231 }
232
233 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
234 {
235 /*
236 * The host_pc has to be in the rx region of the code buffer.
237 * If it is not we will not be able to resolve it here.
238 * The two cases where host_pc will not be correct are:
239 *
240 * - fault during translation (instruction fetch)
241 * - fault from helper (not using GETPC() macro)
242 *
243 * Either way we need return early as we can't resolve it here.
244 */
245 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
246 TranslationBlock *tb = tcg_tb_lookup(host_pc);
247 if (tb) {
248 cpu_restore_state_from_tb(cpu, tb, host_pc);
249 return true;
250 }
251 }
252 return false;
253 }
254
255 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
256 {
257 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
258 TranslationBlock *tb = tcg_tb_lookup(host_pc);
259 if (tb) {
260 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
261 }
262 }
263 return false;
264 }
265
266 void page_init(void)
267 {
268 page_size_init();
269 page_table_config_init();
270 }
271
272 /*
273 * Isolate the portion of code gen which can setjmp/longjmp.
274 * Return the size of the generated code, or negative on error.
275 */
276 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
277 target_ulong pc, void *host_pc,
278 int *max_insns, int64_t *ti)
279 {
280 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
281 if (unlikely(ret != 0)) {
282 return ret;
283 }
284
285 tcg_func_start(tcg_ctx);
286
287 tcg_ctx->cpu = env_cpu(env);
288 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
289 assert(tb->size != 0);
290 tcg_ctx->cpu = NULL;
291 *max_insns = tb->icount;
292
293 #ifdef CONFIG_PROFILER
294 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
295 qatomic_set(&tcg_ctx->prof.interm_time,
296 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
297 *ti = profile_getclock();
298 #endif
299
300 return tcg_gen_code(tcg_ctx, tb, pc);
301 }
302
303 /* Called with mmap_lock held for user mode emulation. */
304 TranslationBlock *tb_gen_code(CPUState *cpu,
305 target_ulong pc, target_ulong cs_base,
306 uint32_t flags, int cflags)
307 {
308 CPUArchState *env = cpu->env_ptr;
309 TranslationBlock *tb, *existing_tb;
310 tb_page_addr_t phys_pc;
311 tcg_insn_unit *gen_code_buf;
312 int gen_code_size, search_size, max_insns;
313 #ifdef CONFIG_PROFILER
314 TCGProfile *prof = &tcg_ctx->prof;
315 #endif
316 int64_t ti;
317 void *host_pc;
318
319 assert_memory_lock();
320 qemu_thread_jit_write();
321
322 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
323
324 if (phys_pc == -1) {
325 /* Generate a one-shot TB with 1 insn in it */
326 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
327 }
328
329 max_insns = cflags & CF_COUNT_MASK;
330 if (max_insns == 0) {
331 max_insns = TCG_MAX_INSNS;
332 }
333 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
334
335 buffer_overflow:
336 tb = tcg_tb_alloc(tcg_ctx);
337 if (unlikely(!tb)) {
338 /* flush must be done */
339 tb_flush(cpu);
340 mmap_unlock();
341 /* Make the execution loop process the flush as soon as possible. */
342 cpu->exception_index = EXCP_INTERRUPT;
343 cpu_loop_exit(cpu);
344 }
345
346 gen_code_buf = tcg_ctx->code_gen_ptr;
347 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
348 if (!(cflags & CF_PCREL)) {
349 tb->pc = pc;
350 }
351 tb->cs_base = cs_base;
352 tb->flags = flags;
353 tb->cflags = cflags;
354 tb_set_page_addr0(tb, phys_pc);
355 tb_set_page_addr1(tb, -1);
356 tcg_ctx->gen_tb = tb;
357 tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
358 #ifdef CONFIG_SOFTMMU
359 tcg_ctx->page_bits = TARGET_PAGE_BITS;
360 tcg_ctx->page_mask = TARGET_PAGE_MASK;
361 tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
362 tcg_ctx->tlb_fast_offset =
363 (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
364 #endif
365 tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
366 #ifdef TCG_GUEST_DEFAULT_MO
367 tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
368 #else
369 tcg_ctx->guest_mo = TCG_MO_ALL;
370 #endif
371
372 tb_overflow:
373
374 #ifdef CONFIG_PROFILER
375 /* includes aborted translations because of exceptions */
376 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
377 ti = profile_getclock();
378 #endif
379
380 trace_translate_block(tb, pc, tb->tc.ptr);
381
382 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
383 if (unlikely(gen_code_size < 0)) {
384 switch (gen_code_size) {
385 case -1:
386 /*
387 * Overflow of code_gen_buffer, or the current slice of it.
388 *
389 * TODO: We don't need to re-do gen_intermediate_code, nor
390 * should we re-do the tcg optimization currently hidden
391 * inside tcg_gen_code. All that should be required is to
392 * flush the TBs, allocate a new TB, re-initialize it per
393 * above, and re-do the actual code generation.
394 */
395 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
396 "Restarting code generation for "
397 "code_gen_buffer overflow\n");
398 goto buffer_overflow;
399
400 case -2:
401 /*
402 * The code generated for the TranslationBlock is too large.
403 * The maximum size allowed by the unwind info is 64k.
404 * There may be stricter constraints from relocations
405 * in the tcg backend.
406 *
407 * Try again with half as many insns as we attempted this time.
408 * If a single insn overflows, there's a bug somewhere...
409 */
410 assert(max_insns > 1);
411 max_insns /= 2;
412 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
413 "Restarting code generation with "
414 "smaller translation block (max %d insns)\n",
415 max_insns);
416 goto tb_overflow;
417
418 default:
419 g_assert_not_reached();
420 }
421 }
422 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
423 if (unlikely(search_size < 0)) {
424 goto buffer_overflow;
425 }
426 tb->tc.size = gen_code_size;
427
428 /*
429 * For CF_PCREL, attribute all executions of the generated code
430 * to its first mapping.
431 */
432 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
433
434 #ifdef CONFIG_PROFILER
435 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
436 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
437 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
438 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
439 #endif
440
441 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
442 qemu_log_in_addr_range(pc)) {
443 FILE *logfile = qemu_log_trylock();
444 if (logfile) {
445 int code_size, data_size;
446 const tcg_target_ulong *rx_data_gen_ptr;
447 size_t chunk_start;
448 int insn = 0;
449
450 if (tcg_ctx->data_gen_ptr) {
451 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
452 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
453 data_size = gen_code_size - code_size;
454 } else {
455 rx_data_gen_ptr = 0;
456 code_size = gen_code_size;
457 data_size = 0;
458 }
459
460 /* Dump header and the first instruction */
461 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
462 fprintf(logfile,
463 " -- guest addr 0x%016" PRIx64 " + tb prologue\n",
464 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
465 chunk_start = tcg_ctx->gen_insn_end_off[insn];
466 disas(logfile, tb->tc.ptr, chunk_start);
467
468 /*
469 * Dump each instruction chunk, wrapping up empty chunks into
470 * the next instruction. The whole array is offset so the
471 * first entry is the beginning of the 2nd instruction.
472 */
473 while (insn < tb->icount) {
474 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
475 if (chunk_end > chunk_start) {
476 fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
477 tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
478 disas(logfile, tb->tc.ptr + chunk_start,
479 chunk_end - chunk_start);
480 chunk_start = chunk_end;
481 }
482 insn++;
483 }
484
485 if (chunk_start < code_size) {
486 fprintf(logfile, " -- tb slow paths + alignment\n");
487 disas(logfile, tb->tc.ptr + chunk_start,
488 code_size - chunk_start);
489 }
490
491 /* Finally dump any data we may have after the block */
492 if (data_size) {
493 int i;
494 fprintf(logfile, " data: [size=%d]\n", data_size);
495 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
496 if (sizeof(tcg_target_ulong) == 8) {
497 fprintf(logfile,
498 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
499 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
500 } else if (sizeof(tcg_target_ulong) == 4) {
501 fprintf(logfile,
502 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
503 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
504 } else {
505 qemu_build_not_reached();
506 }
507 }
508 }
509 fprintf(logfile, "\n");
510 qemu_log_unlock(logfile);
511 }
512 }
513
514 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
515 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
516 CODE_GEN_ALIGN));
517
518 /* init jump list */
519 qemu_spin_init(&tb->jmp_lock);
520 tb->jmp_list_head = (uintptr_t)NULL;
521 tb->jmp_list_next[0] = (uintptr_t)NULL;
522 tb->jmp_list_next[1] = (uintptr_t)NULL;
523 tb->jmp_dest[0] = (uintptr_t)NULL;
524 tb->jmp_dest[1] = (uintptr_t)NULL;
525
526 /* init original jump addresses which have been set during tcg_gen_code() */
527 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
528 tb_reset_jump(tb, 0);
529 }
530 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
531 tb_reset_jump(tb, 1);
532 }
533
534 /*
535 * If the TB is not associated with a physical RAM page then it must be
536 * a temporary one-insn TB, and we have nothing left to do. Return early
537 * before attempting to link to other TBs or add to the lookup table.
538 */
539 if (tb_page_addr0(tb) == -1) {
540 return tb;
541 }
542
543 /*
544 * Insert TB into the corresponding region tree before publishing it
545 * through QHT. Otherwise rewinding happened in the TB might fail to
546 * lookup itself using host PC.
547 */
548 tcg_tb_insert(tb);
549
550 /*
551 * No explicit memory barrier is required -- tb_link_page() makes the
552 * TB visible in a consistent state.
553 */
554 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
555 /* if the TB already exists, discard what we just translated */
556 if (unlikely(existing_tb != tb)) {
557 uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
558
559 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
560 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
561 tcg_tb_remove(tb);
562 return existing_tb;
563 }
564 return tb;
565 }
566
567 /* user-mode: call with mmap_lock held */
568 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
569 {
570 TranslationBlock *tb;
571
572 assert_memory_lock();
573
574 tb = tcg_tb_lookup(retaddr);
575 if (tb) {
576 /* We can use retranslation to find the PC. */
577 cpu_restore_state_from_tb(cpu, tb, retaddr);
578 tb_phys_invalidate(tb, -1);
579 } else {
580 /* The exception probably happened in a helper. The CPU state should
581 have been saved before calling it. Fetch the PC from there. */
582 CPUArchState *env = cpu->env_ptr;
583 target_ulong pc, cs_base;
584 tb_page_addr_t addr;
585 uint32_t flags;
586
587 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
588 addr = get_page_addr_code(env, pc);
589 if (addr != -1) {
590 tb_invalidate_phys_range(addr, addr);
591 }
592 }
593 }
594
595 #ifndef CONFIG_USER_ONLY
596 /*
597 * In deterministic execution mode, instructions doing device I/Os
598 * must be at the end of the TB.
599 *
600 * Called by softmmu_template.h, with iothread mutex not held.
601 */
602 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
603 {
604 TranslationBlock *tb;
605 CPUClass *cc;
606 uint32_t n;
607
608 tb = tcg_tb_lookup(retaddr);
609 if (!tb) {
610 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
611 (void *)retaddr);
612 }
613 cpu_restore_state_from_tb(cpu, tb, retaddr);
614
615 /*
616 * Some guests must re-execute the branch when re-executing a delay
617 * slot instruction. When this is the case, adjust icount and N
618 * to account for the re-execution of the branch.
619 */
620 n = 1;
621 cc = CPU_GET_CLASS(cpu);
622 if (cc->tcg_ops->io_recompile_replay_branch &&
623 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
624 cpu_neg(cpu)->icount_decr.u16.low++;
625 n = 2;
626 }
627
628 /*
629 * Exit the loop and potentially generate a new TB executing the
630 * just the I/O insns. We also limit instrumentation to memory
631 * operations only (which execute after completion) so we don't
632 * double instrument the instruction.
633 */
634 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
635
636 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
637 target_ulong pc = log_pc(cpu, tb);
638 if (qemu_log_in_addr_range(pc)) {
639 qemu_log("cpu_io_recompile: rewound execution of TB to "
640 TARGET_FMT_lx "\n", pc);
641 }
642 }
643
644 cpu_loop_exit_noexc(cpu);
645 }
646
647 static void print_qht_statistics(struct qht_stats hst, GString *buf)
648 {
649 uint32_t hgram_opts;
650 size_t hgram_bins;
651 char *hgram;
652
653 if (!hst.head_buckets) {
654 return;
655 }
656 g_string_append_printf(buf, "TB hash buckets %zu/%zu "
657 "(%0.2f%% head buckets used)\n",
658 hst.used_head_buckets, hst.head_buckets,
659 (double)hst.used_head_buckets /
660 hst.head_buckets * 100);
661
662 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
663 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
664 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
665 hgram_opts |= QDIST_PR_NODECIMAL;
666 }
667 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
668 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
669 "Histogram: %s\n",
670 qdist_avg(&hst.occupancy) * 100, hgram);
671 g_free(hgram);
672
673 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
674 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
675 if (hgram_bins > 10) {
676 hgram_bins = 10;
677 } else {
678 hgram_bins = 0;
679 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
680 }
681 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
682 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
683 "Histogram: %s\n",
684 qdist_avg(&hst.chain), hgram);
685 g_free(hgram);
686 }
687
688 struct tb_tree_stats {
689 size_t nb_tbs;
690 size_t host_size;
691 size_t target_size;
692 size_t max_target_size;
693 size_t direct_jmp_count;
694 size_t direct_jmp2_count;
695 size_t cross_page;
696 };
697
698 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
699 {
700 const TranslationBlock *tb = value;
701 struct tb_tree_stats *tst = data;
702
703 tst->nb_tbs++;
704 tst->host_size += tb->tc.size;
705 tst->target_size += tb->size;
706 if (tb->size > tst->max_target_size) {
707 tst->max_target_size = tb->size;
708 }
709 if (tb_page_addr1(tb) != -1) {
710 tst->cross_page++;
711 }
712 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
713 tst->direct_jmp_count++;
714 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
715 tst->direct_jmp2_count++;
716 }
717 }
718 return false;
719 }
720
721 void dump_exec_info(GString *buf)
722 {
723 struct tb_tree_stats tst = {};
724 struct qht_stats hst;
725 size_t nb_tbs, flush_full, flush_part, flush_elide;
726
727 tcg_tb_foreach(tb_tree_stats_iter, &tst);
728 nb_tbs = tst.nb_tbs;
729 /* XXX: avoid using doubles ? */
730 g_string_append_printf(buf, "Translation buffer state:\n");
731 /*
732 * Report total code size including the padding and TB structs;
733 * otherwise users might think "-accel tcg,tb-size" is not honoured.
734 * For avg host size we use the precise numbers from tb_tree_stats though.
735 */
736 g_string_append_printf(buf, "gen code size %zu/%zu\n",
737 tcg_code_size(), tcg_code_capacity());
738 g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
739 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
740 nb_tbs ? tst.target_size / nb_tbs : 0,
741 tst.max_target_size);
742 g_string_append_printf(buf, "TB avg host size %zu bytes "
743 "(expansion ratio: %0.1f)\n",
744 nb_tbs ? tst.host_size / nb_tbs : 0,
745 tst.target_size ?
746 (double)tst.host_size / tst.target_size : 0);
747 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
748 tst.cross_page,
749 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
750 g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
751 "(2 jumps=%zu %zu%%)\n",
752 tst.direct_jmp_count,
753 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
754 tst.direct_jmp2_count,
755 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
756
757 qht_statistics_init(&tb_ctx.htable, &hst);
758 print_qht_statistics(hst, buf);
759 qht_statistics_destroy(&hst);
760
761 g_string_append_printf(buf, "\nStatistics:\n");
762 g_string_append_printf(buf, "TB flush count %u\n",
763 qatomic_read(&tb_ctx.tb_flush_count));
764 g_string_append_printf(buf, "TB invalidate count %u\n",
765 qatomic_read(&tb_ctx.tb_phys_invalidate_count));
766
767 tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
768 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
769 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
770 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
771 tcg_dump_info(buf);
772 }
773
774 #else /* CONFIG_USER_ONLY */
775
776 void cpu_interrupt(CPUState *cpu, int mask)
777 {
778 g_assert(qemu_mutex_iothread_locked());
779 cpu->interrupt_request |= mask;
780 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
781 }
782
783 #endif /* CONFIG_USER_ONLY */
784
785 /*
786 * Called by generic code at e.g. cpu reset after cpu creation,
787 * therefore we must be prepared to allocate the jump cache.
788 */
789 void tcg_flush_jmp_cache(CPUState *cpu)
790 {
791 CPUJumpCache *jc = cpu->tb_jmp_cache;
792
793 /* During early initialization, the cache may not yet be allocated. */
794 if (unlikely(jc == NULL)) {
795 return;
796 }
797
798 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
799 qatomic_set(&jc->array[i].tb, NULL);
800 }
801 }
802
803 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
804 void tcg_flush_softmmu_tlb(CPUState *cs)
805 {
806 #ifdef CONFIG_SOFTMMU
807 tlb_flush(cs);
808 #endif
809 }