]> git.proxmox.com Git - mirror_qemu.git/blob - accel/tcg/translate-all.c
accel/tcg: Pass max_insn to gen_intermediate_code by pointer
[mirror_qemu.git] / accel / tcg / translate-all.c
1 /*
2 * Host code generation
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "qemu/osdep.h"
21
22 #define NO_CPU_IO_DEFS
23 #include "trace.h"
24 #include "disas/disas.h"
25 #include "exec/exec-all.h"
26 #include "tcg/tcg.h"
27 #if defined(CONFIG_USER_ONLY)
28 #include "qemu.h"
29 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
30 #include <sys/param.h>
31 #if __FreeBSD_version >= 700104
32 #define HAVE_KINFO_GETVMMAP
33 #define sigqueue sigqueue_freebsd /* avoid redefinition */
34 #include <sys/proc.h>
35 #include <machine/profile.h>
36 #define _KERNEL
37 #include <sys/user.h>
38 #undef _KERNEL
39 #undef sigqueue
40 #include <libutil.h>
41 #endif
42 #endif
43 #else
44 #include "exec/ram_addr.h"
45 #endif
46
47 #include "exec/cputlb.h"
48 #include "exec/translate-all.h"
49 #include "exec/translator.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/qemu-print.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/cacheinfo.h"
54 #include "exec/log.h"
55 #include "sysemu/cpus.h"
56 #include "sysemu/cpu-timers.h"
57 #include "sysemu/tcg.h"
58 #include "qapi/error.h"
59 #include "hw/core/tcg-cpu-ops.h"
60 #include "tb-jmp-cache.h"
61 #include "tb-hash.h"
62 #include "tb-context.h"
63 #include "internal.h"
64 #include "perf.h"
65
66 /* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
67 QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
68 sizeof_field(TranslationBlock, trace_vcpu_dstate)
69 * BITS_PER_BYTE);
70
71 TBContext tb_ctx;
72
73 /* Encode VAL as a signed leb128 sequence at P.
74 Return P incremented past the encoded value. */
75 static uint8_t *encode_sleb128(uint8_t *p, target_long val)
76 {
77 int more, byte;
78
79 do {
80 byte = val & 0x7f;
81 val >>= 7;
82 more = !((val == 0 && (byte & 0x40) == 0)
83 || (val == -1 && (byte & 0x40) != 0));
84 if (more) {
85 byte |= 0x80;
86 }
87 *p++ = byte;
88 } while (more);
89
90 return p;
91 }
92
93 /* Decode a signed leb128 sequence at *PP; increment *PP past the
94 decoded value. Return the decoded value. */
95 static target_long decode_sleb128(const uint8_t **pp)
96 {
97 const uint8_t *p = *pp;
98 target_long val = 0;
99 int byte, shift = 0;
100
101 do {
102 byte = *p++;
103 val |= (target_ulong)(byte & 0x7f) << shift;
104 shift += 7;
105 } while (byte & 0x80);
106 if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
107 val |= -(target_ulong)1 << shift;
108 }
109
110 *pp = p;
111 return val;
112 }
113
114 /* Encode the data collected about the instructions while compiling TB.
115 Place the data at BLOCK, and return the number of bytes consumed.
116
117 The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
118 which come from the target's insn_start data, followed by a uintptr_t
119 which comes from the host pc of the end of the code implementing the insn.
120
121 Each line of the table is encoded as sleb128 deltas from the previous
122 line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
123 That is, the first column is seeded with the guest pc, the last column
124 with the host pc, and the middle columns with zeros. */
125
126 static int encode_search(TranslationBlock *tb, uint8_t *block)
127 {
128 uint8_t *highwater = tcg_ctx->code_gen_highwater;
129 uint8_t *p = block;
130 int i, j, n;
131
132 for (i = 0, n = tb->icount; i < n; ++i) {
133 target_ulong prev;
134
135 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
136 if (i == 0) {
137 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
138 } else {
139 prev = tcg_ctx->gen_insn_data[i - 1][j];
140 }
141 p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
142 }
143 prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
144 p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
145
146 /* Test for (pending) buffer overflow. The assumption is that any
147 one row beginning below the high water mark cannot overrun
148 the buffer completely. Thus we can test for overflow after
149 encoding a row without having to check during encoding. */
150 if (unlikely(p > highwater)) {
151 return -1;
152 }
153 }
154
155 return p - block;
156 }
157
158 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
159 uint64_t *data)
160 {
161 uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
162 const uint8_t *p = tb->tc.ptr + tb->tc.size;
163 int i, j, num_insns = tb->icount;
164
165 host_pc -= GETPC_ADJ;
166
167 if (host_pc < iter_pc) {
168 return -1;
169 }
170
171 memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
172 if (!(tb_cflags(tb) & CF_PCREL)) {
173 data[0] = tb->pc;
174 }
175
176 /*
177 * Reconstruct the stored insn data while looking for the point
178 * at which the end of the insn exceeds host_pc.
179 */
180 for (i = 0; i < num_insns; ++i) {
181 for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
182 data[j] += decode_sleb128(&p);
183 }
184 iter_pc += decode_sleb128(&p);
185 if (iter_pc > host_pc) {
186 return num_insns - i;
187 }
188 }
189 return -1;
190 }
191
192 /*
193 * The cpu state corresponding to 'host_pc' is restored in
194 * preparation for exiting the TB.
195 */
196 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
197 uintptr_t host_pc)
198 {
199 uint64_t data[TARGET_INSN_START_WORDS];
200 #ifdef CONFIG_PROFILER
201 TCGProfile *prof = &tcg_ctx->prof;
202 int64_t ti = profile_getclock();
203 #endif
204 int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
205
206 if (insns_left < 0) {
207 return;
208 }
209
210 if (tb_cflags(tb) & CF_USE_ICOUNT) {
211 assert(icount_enabled());
212 /*
213 * Reset the cycle counter to the start of the block and
214 * shift if to the number of actually executed instructions.
215 */
216 cpu_neg(cpu)->icount_decr.u16.low += insns_left;
217 }
218
219 cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
220
221 #ifdef CONFIG_PROFILER
222 qatomic_set(&prof->restore_time,
223 prof->restore_time + profile_getclock() - ti);
224 qatomic_set(&prof->restore_count, prof->restore_count + 1);
225 #endif
226 }
227
228 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
229 {
230 /*
231 * The host_pc has to be in the rx region of the code buffer.
232 * If it is not we will not be able to resolve it here.
233 * The two cases where host_pc will not be correct are:
234 *
235 * - fault during translation (instruction fetch)
236 * - fault from helper (not using GETPC() macro)
237 *
238 * Either way we need return early as we can't resolve it here.
239 */
240 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
241 TranslationBlock *tb = tcg_tb_lookup(host_pc);
242 if (tb) {
243 cpu_restore_state_from_tb(cpu, tb, host_pc);
244 return true;
245 }
246 }
247 return false;
248 }
249
250 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
251 {
252 if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
253 TranslationBlock *tb = tcg_tb_lookup(host_pc);
254 if (tb) {
255 return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
256 }
257 }
258 return false;
259 }
260
261 void page_init(void)
262 {
263 page_size_init();
264 page_table_config_init();
265 }
266
267 /*
268 * Isolate the portion of code gen which can setjmp/longjmp.
269 * Return the size of the generated code, or negative on error.
270 */
271 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
272 target_ulong pc, void *host_pc,
273 int *max_insns, int64_t *ti)
274 {
275 int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
276 if (unlikely(ret != 0)) {
277 return ret;
278 }
279
280 tcg_func_start(tcg_ctx);
281
282 tcg_ctx->cpu = env_cpu(env);
283 gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
284 assert(tb->size != 0);
285 tcg_ctx->cpu = NULL;
286 *max_insns = tb->icount;
287
288 #ifdef CONFIG_PROFILER
289 qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
290 qatomic_set(&tcg_ctx->prof.interm_time,
291 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
292 *ti = profile_getclock();
293 #endif
294
295 return tcg_gen_code(tcg_ctx, tb, pc);
296 }
297
298 /* Called with mmap_lock held for user mode emulation. */
299 TranslationBlock *tb_gen_code(CPUState *cpu,
300 target_ulong pc, target_ulong cs_base,
301 uint32_t flags, int cflags)
302 {
303 CPUArchState *env = cpu->env_ptr;
304 TranslationBlock *tb, *existing_tb;
305 tb_page_addr_t phys_pc;
306 tcg_insn_unit *gen_code_buf;
307 int gen_code_size, search_size, max_insns;
308 #ifdef CONFIG_PROFILER
309 TCGProfile *prof = &tcg_ctx->prof;
310 #endif
311 int64_t ti;
312 void *host_pc;
313
314 assert_memory_lock();
315 qemu_thread_jit_write();
316
317 phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
318
319 if (phys_pc == -1) {
320 /* Generate a one-shot TB with 1 insn in it */
321 cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
322 }
323
324 max_insns = cflags & CF_COUNT_MASK;
325 if (max_insns == 0) {
326 max_insns = TCG_MAX_INSNS;
327 }
328 QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
329
330 buffer_overflow:
331 tb = tcg_tb_alloc(tcg_ctx);
332 if (unlikely(!tb)) {
333 /* flush must be done */
334 tb_flush(cpu);
335 mmap_unlock();
336 /* Make the execution loop process the flush as soon as possible. */
337 cpu->exception_index = EXCP_INTERRUPT;
338 cpu_loop_exit(cpu);
339 }
340
341 gen_code_buf = tcg_ctx->code_gen_ptr;
342 tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
343 if (!(cflags & CF_PCREL)) {
344 tb->pc = pc;
345 }
346 tb->cs_base = cs_base;
347 tb->flags = flags;
348 tb->cflags = cflags;
349 tb->trace_vcpu_dstate = *cpu->trace_dstate;
350 tb_set_page_addr0(tb, phys_pc);
351 tb_set_page_addr1(tb, -1);
352 tcg_ctx->gen_tb = tb;
353 tb_overflow:
354
355 #ifdef CONFIG_PROFILER
356 /* includes aborted translations because of exceptions */
357 qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
358 ti = profile_getclock();
359 #endif
360
361 trace_translate_block(tb, pc, tb->tc.ptr);
362
363 gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
364 if (unlikely(gen_code_size < 0)) {
365 switch (gen_code_size) {
366 case -1:
367 /*
368 * Overflow of code_gen_buffer, or the current slice of it.
369 *
370 * TODO: We don't need to re-do gen_intermediate_code, nor
371 * should we re-do the tcg optimization currently hidden
372 * inside tcg_gen_code. All that should be required is to
373 * flush the TBs, allocate a new TB, re-initialize it per
374 * above, and re-do the actual code generation.
375 */
376 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
377 "Restarting code generation for "
378 "code_gen_buffer overflow\n");
379 goto buffer_overflow;
380
381 case -2:
382 /*
383 * The code generated for the TranslationBlock is too large.
384 * The maximum size allowed by the unwind info is 64k.
385 * There may be stricter constraints from relocations
386 * in the tcg backend.
387 *
388 * Try again with half as many insns as we attempted this time.
389 * If a single insn overflows, there's a bug somewhere...
390 */
391 assert(max_insns > 1);
392 max_insns /= 2;
393 qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
394 "Restarting code generation with "
395 "smaller translation block (max %d insns)\n",
396 max_insns);
397 goto tb_overflow;
398
399 default:
400 g_assert_not_reached();
401 }
402 }
403 search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
404 if (unlikely(search_size < 0)) {
405 goto buffer_overflow;
406 }
407 tb->tc.size = gen_code_size;
408
409 /*
410 * For CF_PCREL, attribute all executions of the generated code
411 * to its first mapping.
412 */
413 perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
414
415 #ifdef CONFIG_PROFILER
416 qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
417 qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
418 qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
419 qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
420 #endif
421
422 #ifdef DEBUG_DISAS
423 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
424 qemu_log_in_addr_range(pc)) {
425 FILE *logfile = qemu_log_trylock();
426 if (logfile) {
427 int code_size, data_size;
428 const tcg_target_ulong *rx_data_gen_ptr;
429 size_t chunk_start;
430 int insn = 0;
431
432 if (tcg_ctx->data_gen_ptr) {
433 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
434 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
435 data_size = gen_code_size - code_size;
436 } else {
437 rx_data_gen_ptr = 0;
438 code_size = gen_code_size;
439 data_size = 0;
440 }
441
442 /* Dump header and the first instruction */
443 fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
444 fprintf(logfile,
445 " -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
446 tcg_ctx->gen_insn_data[insn][0]);
447 chunk_start = tcg_ctx->gen_insn_end_off[insn];
448 disas(logfile, tb->tc.ptr, chunk_start);
449
450 /*
451 * Dump each instruction chunk, wrapping up empty chunks into
452 * the next instruction. The whole array is offset so the
453 * first entry is the beginning of the 2nd instruction.
454 */
455 while (insn < tb->icount) {
456 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
457 if (chunk_end > chunk_start) {
458 fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
459 tcg_ctx->gen_insn_data[insn][0]);
460 disas(logfile, tb->tc.ptr + chunk_start,
461 chunk_end - chunk_start);
462 chunk_start = chunk_end;
463 }
464 insn++;
465 }
466
467 if (chunk_start < code_size) {
468 fprintf(logfile, " -- tb slow paths + alignment\n");
469 disas(logfile, tb->tc.ptr + chunk_start,
470 code_size - chunk_start);
471 }
472
473 /* Finally dump any data we may have after the block */
474 if (data_size) {
475 int i;
476 fprintf(logfile, " data: [size=%d]\n", data_size);
477 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
478 if (sizeof(tcg_target_ulong) == 8) {
479 fprintf(logfile,
480 "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
481 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
482 } else if (sizeof(tcg_target_ulong) == 4) {
483 fprintf(logfile,
484 "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
485 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
486 } else {
487 qemu_build_not_reached();
488 }
489 }
490 }
491 fprintf(logfile, "\n");
492 qemu_log_unlock(logfile);
493 }
494 }
495 #endif
496
497 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
498 ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
499 CODE_GEN_ALIGN));
500
501 /* init jump list */
502 qemu_spin_init(&tb->jmp_lock);
503 tb->jmp_list_head = (uintptr_t)NULL;
504 tb->jmp_list_next[0] = (uintptr_t)NULL;
505 tb->jmp_list_next[1] = (uintptr_t)NULL;
506 tb->jmp_dest[0] = (uintptr_t)NULL;
507 tb->jmp_dest[1] = (uintptr_t)NULL;
508
509 /* init original jump addresses which have been set during tcg_gen_code() */
510 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
511 tb_reset_jump(tb, 0);
512 }
513 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
514 tb_reset_jump(tb, 1);
515 }
516
517 /*
518 * If the TB is not associated with a physical RAM page then it must be
519 * a temporary one-insn TB, and we have nothing left to do. Return early
520 * before attempting to link to other TBs or add to the lookup table.
521 */
522 if (tb_page_addr0(tb) == -1) {
523 return tb;
524 }
525
526 /*
527 * Insert TB into the corresponding region tree before publishing it
528 * through QHT. Otherwise rewinding happened in the TB might fail to
529 * lookup itself using host PC.
530 */
531 tcg_tb_insert(tb);
532
533 /*
534 * No explicit memory barrier is required -- tb_link_page() makes the
535 * TB visible in a consistent state.
536 */
537 existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
538 /* if the TB already exists, discard what we just translated */
539 if (unlikely(existing_tb != tb)) {
540 uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
541
542 orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
543 qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
544 tcg_tb_remove(tb);
545 return existing_tb;
546 }
547 return tb;
548 }
549
550 /* user-mode: call with mmap_lock held */
551 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
552 {
553 TranslationBlock *tb;
554
555 assert_memory_lock();
556
557 tb = tcg_tb_lookup(retaddr);
558 if (tb) {
559 /* We can use retranslation to find the PC. */
560 cpu_restore_state_from_tb(cpu, tb, retaddr);
561 tb_phys_invalidate(tb, -1);
562 } else {
563 /* The exception probably happened in a helper. The CPU state should
564 have been saved before calling it. Fetch the PC from there. */
565 CPUArchState *env = cpu->env_ptr;
566 target_ulong pc, cs_base;
567 tb_page_addr_t addr;
568 uint32_t flags;
569
570 cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
571 addr = get_page_addr_code(env, pc);
572 if (addr != -1) {
573 tb_invalidate_phys_range(addr, addr + 1);
574 }
575 }
576 }
577
578 #ifndef CONFIG_USER_ONLY
579 /*
580 * In deterministic execution mode, instructions doing device I/Os
581 * must be at the end of the TB.
582 *
583 * Called by softmmu_template.h, with iothread mutex not held.
584 */
585 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
586 {
587 TranslationBlock *tb;
588 CPUClass *cc;
589 uint32_t n;
590
591 tb = tcg_tb_lookup(retaddr);
592 if (!tb) {
593 cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
594 (void *)retaddr);
595 }
596 cpu_restore_state_from_tb(cpu, tb, retaddr);
597
598 /*
599 * Some guests must re-execute the branch when re-executing a delay
600 * slot instruction. When this is the case, adjust icount and N
601 * to account for the re-execution of the branch.
602 */
603 n = 1;
604 cc = CPU_GET_CLASS(cpu);
605 if (cc->tcg_ops->io_recompile_replay_branch &&
606 cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
607 cpu_neg(cpu)->icount_decr.u16.low++;
608 n = 2;
609 }
610
611 /*
612 * Exit the loop and potentially generate a new TB executing the
613 * just the I/O insns. We also limit instrumentation to memory
614 * operations only (which execute after completion) so we don't
615 * double instrument the instruction.
616 */
617 cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
618
619 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
620 target_ulong pc = log_pc(cpu, tb);
621 if (qemu_log_in_addr_range(pc)) {
622 qemu_log("cpu_io_recompile: rewound execution of TB to "
623 TARGET_FMT_lx "\n", pc);
624 }
625 }
626
627 cpu_loop_exit_noexc(cpu);
628 }
629
630 static void print_qht_statistics(struct qht_stats hst, GString *buf)
631 {
632 uint32_t hgram_opts;
633 size_t hgram_bins;
634 char *hgram;
635
636 if (!hst.head_buckets) {
637 return;
638 }
639 g_string_append_printf(buf, "TB hash buckets %zu/%zu "
640 "(%0.2f%% head buckets used)\n",
641 hst.used_head_buckets, hst.head_buckets,
642 (double)hst.used_head_buckets /
643 hst.head_buckets * 100);
644
645 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
646 hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
647 if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
648 hgram_opts |= QDIST_PR_NODECIMAL;
649 }
650 hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
651 g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
652 "Histogram: %s\n",
653 qdist_avg(&hst.occupancy) * 100, hgram);
654 g_free(hgram);
655
656 hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
657 hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
658 if (hgram_bins > 10) {
659 hgram_bins = 10;
660 } else {
661 hgram_bins = 0;
662 hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
663 }
664 hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
665 g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
666 "Histogram: %s\n",
667 qdist_avg(&hst.chain), hgram);
668 g_free(hgram);
669 }
670
671 struct tb_tree_stats {
672 size_t nb_tbs;
673 size_t host_size;
674 size_t target_size;
675 size_t max_target_size;
676 size_t direct_jmp_count;
677 size_t direct_jmp2_count;
678 size_t cross_page;
679 };
680
681 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
682 {
683 const TranslationBlock *tb = value;
684 struct tb_tree_stats *tst = data;
685
686 tst->nb_tbs++;
687 tst->host_size += tb->tc.size;
688 tst->target_size += tb->size;
689 if (tb->size > tst->max_target_size) {
690 tst->max_target_size = tb->size;
691 }
692 if (tb_page_addr1(tb) != -1) {
693 tst->cross_page++;
694 }
695 if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
696 tst->direct_jmp_count++;
697 if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
698 tst->direct_jmp2_count++;
699 }
700 }
701 return false;
702 }
703
704 void dump_exec_info(GString *buf)
705 {
706 struct tb_tree_stats tst = {};
707 struct qht_stats hst;
708 size_t nb_tbs, flush_full, flush_part, flush_elide;
709
710 tcg_tb_foreach(tb_tree_stats_iter, &tst);
711 nb_tbs = tst.nb_tbs;
712 /* XXX: avoid using doubles ? */
713 g_string_append_printf(buf, "Translation buffer state:\n");
714 /*
715 * Report total code size including the padding and TB structs;
716 * otherwise users might think "-accel tcg,tb-size" is not honoured.
717 * For avg host size we use the precise numbers from tb_tree_stats though.
718 */
719 g_string_append_printf(buf, "gen code size %zu/%zu\n",
720 tcg_code_size(), tcg_code_capacity());
721 g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
722 g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
723 nb_tbs ? tst.target_size / nb_tbs : 0,
724 tst.max_target_size);
725 g_string_append_printf(buf, "TB avg host size %zu bytes "
726 "(expansion ratio: %0.1f)\n",
727 nb_tbs ? tst.host_size / nb_tbs : 0,
728 tst.target_size ?
729 (double)tst.host_size / tst.target_size : 0);
730 g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
731 tst.cross_page,
732 nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
733 g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
734 "(2 jumps=%zu %zu%%)\n",
735 tst.direct_jmp_count,
736 nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
737 tst.direct_jmp2_count,
738 nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
739
740 qht_statistics_init(&tb_ctx.htable, &hst);
741 print_qht_statistics(hst, buf);
742 qht_statistics_destroy(&hst);
743
744 g_string_append_printf(buf, "\nStatistics:\n");
745 g_string_append_printf(buf, "TB flush count %u\n",
746 qatomic_read(&tb_ctx.tb_flush_count));
747 g_string_append_printf(buf, "TB invalidate count %u\n",
748 qatomic_read(&tb_ctx.tb_phys_invalidate_count));
749
750 tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
751 g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
752 g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
753 g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
754 tcg_dump_info(buf);
755 }
756
757 #else /* CONFIG_USER_ONLY */
758
759 void cpu_interrupt(CPUState *cpu, int mask)
760 {
761 g_assert(qemu_mutex_iothread_locked());
762 cpu->interrupt_request |= mask;
763 qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
764 }
765
766 #endif /* CONFIG_USER_ONLY */
767
768 /*
769 * Called by generic code at e.g. cpu reset after cpu creation,
770 * therefore we must be prepared to allocate the jump cache.
771 */
772 void tcg_flush_jmp_cache(CPUState *cpu)
773 {
774 CPUJumpCache *jc = cpu->tb_jmp_cache;
775
776 /* During early initialization, the cache may not yet be allocated. */
777 if (unlikely(jc == NULL)) {
778 return;
779 }
780
781 for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
782 qatomic_set(&jc->array[i].tb, NULL);
783 }
784 }
785
786 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
787 void tcg_flush_softmmu_tlb(CPUState *cs)
788 {
789 #ifdef CONFIG_SOFTMMU
790 tlb_flush(cs);
791 #endif
792 }