accel/tcg/translate-all.c

   1 /*
   2  *  Host code generation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21
  22 #include "trace.h"
  23 #include "disas/disas.h"
  24 #include "exec/exec-all.h"
  25 #include "tcg/tcg.h"
  26 #if defined(CONFIG_USER_ONLY)
  27 #include "qemu.h"
  28 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  29 #include <sys/param.h>
  30 #if __FreeBSD_version >= 700104
  31 #define HAVE_KINFO_GETVMMAP
  32 #define sigqueue sigqueue_freebsd  /* avoid redefinition */
  33 #include <sys/proc.h>
  34 #include <machine/profile.h>
  35 #define _KERNEL
  36 #include <sys/user.h>
  37 #undef _KERNEL
  38 #undef sigqueue
  39 #include <libutil.h>
  40 #endif
  41 #endif
  42 #else
  43 #include "exec/ram_addr.h"
  44 #endif
  45
  46 #include "exec/cputlb.h"
  47 #include "exec/translate-all.h"
  48 #include "exec/translator.h"
  49 #include "exec/tb-flush.h"
  50 #include "qemu/bitmap.h"
  51 #include "qemu/qemu-print.h"
  52 #include "qemu/main-loop.h"
  53 #include "qemu/cacheinfo.h"
  54 #include "qemu/timer.h"
  55 #include "exec/log.h"
  56 #include "sysemu/cpus.h"
  57 #include "sysemu/cpu-timers.h"
  58 #include "sysemu/tcg.h"
  59 #include "qapi/error.h"
  60 #include "hw/core/tcg-cpu-ops.h"
  61 #include "tb-jmp-cache.h"
  62 #include "tb-hash.h"
  63 #include "tb-context.h"
  64 #include "internal.h"
  65 #include "perf.h"
  66 #include "tcg/insn-start-words.h"
  67
  68 TBContext tb_ctx;
  69
  70 /*
  71  * Encode VAL as a signed leb128 sequence at P.
  72  * Return P incremented past the encoded value.
  73  */
  74 static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
  75 {
  76     int more, byte;
  77
  78     do {
  79         byte = val & 0x7f;
  80         val >>= 7;
  81         more = !((val == 0 && (byte & 0x40) == 0)
  82                  || (val == -1 && (byte & 0x40) != 0));
  83         if (more) {
  84             byte |= 0x80;
  85         }
  86         *p++ = byte;
  87     } while (more);
  88
  89     return p;
  90 }
  91
  92 /*
  93  * Decode a signed leb128 sequence at *PP; increment *PP past the
  94  * decoded value.  Return the decoded value.
  95  */
  96 static int64_t decode_sleb128(const uint8_t **pp)
  97 {
  98     const uint8_t *p = *pp;
  99     int64_t val = 0;
 100     int byte, shift = 0;
 101
 102     do {
 103         byte = *p++;
 104         val |= (int64_t)(byte & 0x7f) << shift;
 105         shift += 7;
 106     } while (byte & 0x80);
 107     if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
 108         val |= -(int64_t)1 << shift;
 109     }
 110
 111     *pp = p;
 112     return val;
 113 }
 114
 115 /* Encode the data collected about the instructions while compiling TB.
 116    Place the data at BLOCK, and return the number of bytes consumed.
 117
 118    The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
 119    which come from the target's insn_start data, followed by a uintptr_t
 120    which comes from the host pc of the end of the code implementing the insn.
 121
 122    Each line of the table is encoded as sleb128 deltas from the previous
 123    line.  The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
 124    That is, the first column is seeded with the guest pc, the last column
 125    with the host pc, and the middle columns with zeros.  */
 126
 127 static int encode_search(TranslationBlock *tb, uint8_t *block)
 128 {
 129     uint8_t *highwater = tcg_ctx->code_gen_highwater;
 130     uint64_t *insn_data = tcg_ctx->gen_insn_data;
 131     uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
 132     uint8_t *p = block;
 133     int i, j, n;
 134
 135     for (i = 0, n = tb->icount; i < n; ++i) {
 136         uint64_t prev, curr;
 137
 138         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 139             if (i == 0) {
 140                 prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
 141             } else {
 142                 prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
 143             }
 144             curr = insn_data[i * TARGET_INSN_START_WORDS + j];
 145             p = encode_sleb128(p, curr - prev);
 146         }
 147         prev = (i == 0 ? 0 : insn_end_off[i - 1]);
 148         curr = insn_end_off[i];
 149         p = encode_sleb128(p, curr - prev);
 150
 151         /* Test for (pending) buffer overflow.  The assumption is that any
 152            one row beginning below the high water mark cannot overrun
 153            the buffer completely.  Thus we can test for overflow after
 154            encoding a row without having to check during encoding.  */
 155         if (unlikely(p > highwater)) {
 156             return -1;
 157         }
 158     }
 159
 160     return p - block;
 161 }
 162
 163 static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
 164                                    uint64_t *data)
 165 {
 166     uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
 167     const uint8_t *p = tb->tc.ptr + tb->tc.size;
 168     int i, j, num_insns = tb->icount;
 169
 170     host_pc -= GETPC_ADJ;
 171
 172     if (host_pc < iter_pc) {
 173         return -1;
 174     }
 175
 176     memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
 177     if (!(tb_cflags(tb) & CF_PCREL)) {
 178         data[0] = tb->pc;
 179     }
 180
 181     /*
 182      * Reconstruct the stored insn data while looking for the point
 183      * at which the end of the insn exceeds host_pc.
 184      */
 185     for (i = 0; i < num_insns; ++i) {
 186         for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
 187             data[j] += decode_sleb128(&p);
 188         }
 189         iter_pc += decode_sleb128(&p);
 190         if (iter_pc > host_pc) {
 191             return num_insns - i;
 192         }
 193     }
 194     return -1;
 195 }
 196
 197 /*
 198  * The cpu state corresponding to 'host_pc' is restored in
 199  * preparation for exiting the TB.
 200  */
 201 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
 202                                uintptr_t host_pc)
 203 {
 204     uint64_t data[TARGET_INSN_START_WORDS];
 205 #ifdef CONFIG_PROFILER
 206     TCGProfile *prof = &tcg_ctx->prof;
 207     int64_t ti = profile_getclock();
 208 #endif
 209     int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
 210
 211     if (insns_left < 0) {
 212         return;
 213     }
 214
 215     if (tb_cflags(tb) & CF_USE_ICOUNT) {
 216         assert(icount_enabled());
 217         /*
 218          * Reset the cycle counter to the start of the block and
 219          * shift if to the number of actually executed instructions.
 220          */
 221         cpu_neg(cpu)->icount_decr.u16.low += insns_left;
 222     }
 223
 224     cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
 225
 226 #ifdef CONFIG_PROFILER
 227     qatomic_set(&prof->restore_time,
 228                 prof->restore_time + profile_getclock() - ti);
 229     qatomic_set(&prof->restore_count, prof->restore_count + 1);
 230 #endif
 231 }
 232
 233 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
 234 {
 235     /*
 236      * The host_pc has to be in the rx region of the code buffer.
 237      * If it is not we will not be able to resolve it here.
 238      * The two cases where host_pc will not be correct are:
 239      *
 240      *  - fault during translation (instruction fetch)
 241      *  - fault from helper (not using GETPC() macro)
 242      *
 243      * Either way we need return early as we can't resolve it here.
 244      */
 245     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 246         TranslationBlock *tb = tcg_tb_lookup(host_pc);
 247         if (tb) {
 248             cpu_restore_state_from_tb(cpu, tb, host_pc);
 249             return true;
 250         }
 251     }
 252     return false;
 253 }
 254
 255 bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
 256 {
 257     if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
 258         TranslationBlock *tb = tcg_tb_lookup(host_pc);
 259         if (tb) {
 260             return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
 261         }
 262     }
 263     return false;
 264 }
 265
 266 void page_init(void)
 267 {
 268     page_size_init();
 269     page_table_config_init();
 270 }
 271
 272 /*
 273  * Isolate the portion of code gen which can setjmp/longjmp.
 274  * Return the size of the generated code, or negative on error.
 275  */
 276 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
 277                            target_ulong pc, void *host_pc,
 278                            int *max_insns, int64_t *ti)
 279 {
 280     int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
 281     if (unlikely(ret != 0)) {
 282         return ret;
 283     }
 284
 285     tcg_func_start(tcg_ctx);
 286
 287     tcg_ctx->cpu = env_cpu(env);
 288     gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
 289     assert(tb->size != 0);
 290     tcg_ctx->cpu = NULL;
 291     *max_insns = tb->icount;
 292
 293 #ifdef CONFIG_PROFILER
 294     qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
 295     qatomic_set(&tcg_ctx->prof.interm_time,
 296                 tcg_ctx->prof.interm_time + profile_getclock() - *ti);
 297     *ti = profile_getclock();
 298 #endif
 299
 300     return tcg_gen_code(tcg_ctx, tb, pc);
 301 }
 302
 303 /* Called with mmap_lock held for user mode emulation.  */
 304 TranslationBlock *tb_gen_code(CPUState *cpu,
 305                               target_ulong pc, target_ulong cs_base,
 306                               uint32_t flags, int cflags)
 307 {
 308     CPUArchState *env = cpu->env_ptr;
 309     TranslationBlock *tb, *existing_tb;
 310     tb_page_addr_t phys_pc;
 311     tcg_insn_unit *gen_code_buf;
 312     int gen_code_size, search_size, max_insns;
 313 #ifdef CONFIG_PROFILER
 314     TCGProfile *prof = &tcg_ctx->prof;
 315 #endif
 316     int64_t ti;
 317     void *host_pc;
 318
 319     assert_memory_lock();
 320     qemu_thread_jit_write();
 321
 322     phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
 323
 324     if (phys_pc == -1) {
 325         /* Generate a one-shot TB with 1 insn in it */
 326         cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
 327     }
 328
 329     max_insns = cflags & CF_COUNT_MASK;
 330     if (max_insns == 0) {
 331         max_insns = TCG_MAX_INSNS;
 332     }
 333     QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
 334
 335  buffer_overflow:
 336     tb = tcg_tb_alloc(tcg_ctx);
 337     if (unlikely(!tb)) {
 338         /* flush must be done */
 339         tb_flush(cpu);
 340         mmap_unlock();
 341         /* Make the execution loop process the flush as soon as possible.  */
 342         cpu->exception_index = EXCP_INTERRUPT;
 343         cpu_loop_exit(cpu);
 344     }
 345
 346     gen_code_buf = tcg_ctx->code_gen_ptr;
 347     tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
 348     if (!(cflags & CF_PCREL)) {
 349         tb->pc = pc;
 350     }
 351     tb->cs_base = cs_base;
 352     tb->flags = flags;
 353     tb->cflags = cflags;
 354     tb_set_page_addr0(tb, phys_pc);
 355     tb_set_page_addr1(tb, -1);
 356     tcg_ctx->gen_tb = tb;
 357     tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
 358 #ifdef CONFIG_SOFTMMU
 359     tcg_ctx->page_bits = TARGET_PAGE_BITS;
 360     tcg_ctx->page_mask = TARGET_PAGE_MASK;
 361     tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
 362     tcg_ctx->tlb_fast_offset =
 363         (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
 364 #endif
 365     tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
 366 #ifdef TCG_GUEST_DEFAULT_MO
 367     tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
 368 #else
 369     tcg_ctx->guest_mo = TCG_MO_ALL;
 370 #endif
 371
 372  tb_overflow:
 373
 374 #ifdef CONFIG_PROFILER
 375     /* includes aborted translations because of exceptions */
 376     qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
 377     ti = profile_getclock();
 378 #endif
 379
 380     trace_translate_block(tb, pc, tb->tc.ptr);
 381
 382     gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
 383     if (unlikely(gen_code_size < 0)) {
 384         switch (gen_code_size) {
 385         case -1:
 386             /*
 387              * Overflow of code_gen_buffer, or the current slice of it.
 388              *
 389              * TODO: We don't need to re-do gen_intermediate_code, nor
 390              * should we re-do the tcg optimization currently hidden
 391              * inside tcg_gen_code.  All that should be required is to
 392              * flush the TBs, allocate a new TB, re-initialize it per
 393              * above, and re-do the actual code generation.
 394              */
 395             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
 396                           "Restarting code generation for "
 397                           "code_gen_buffer overflow\n");
 398             goto buffer_overflow;
 399
 400         case -2:
 401             /*
 402              * The code generated for the TranslationBlock is too large.
 403              * The maximum size allowed by the unwind info is 64k.
 404              * There may be stricter constraints from relocations
 405              * in the tcg backend.
 406              *
 407              * Try again with half as many insns as we attempted this time.
 408              * If a single insn overflows, there's a bug somewhere...
 409              */
 410             assert(max_insns > 1);
 411             max_insns /= 2;
 412             qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
 413                           "Restarting code generation with "
 414                           "smaller translation block (max %d insns)\n",
 415                           max_insns);
 416             goto tb_overflow;
 417
 418         default:
 419             g_assert_not_reached();
 420         }
 421     }
 422     search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
 423     if (unlikely(search_size < 0)) {
 424         goto buffer_overflow;
 425     }
 426     tb->tc.size = gen_code_size;
 427
 428     /*
 429      * For CF_PCREL, attribute all executions of the generated code
 430      * to its first mapping.
 431      */
 432     perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
 433
 434 #ifdef CONFIG_PROFILER
 435     qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
 436     qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
 437     qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
 438     qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
 439 #endif
 440
 441     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
 442         qemu_log_in_addr_range(pc)) {
 443         FILE *logfile = qemu_log_trylock();
 444         if (logfile) {
 445             int code_size, data_size;
 446             const tcg_target_ulong *rx_data_gen_ptr;
 447             size_t chunk_start;
 448             int insn = 0;
 449
 450             if (tcg_ctx->data_gen_ptr) {
 451                 rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
 452                 code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
 453                 data_size = gen_code_size - code_size;
 454             } else {
 455                 rx_data_gen_ptr = 0;
 456                 code_size = gen_code_size;
 457                 data_size = 0;
 458             }
 459
 460             /* Dump header and the first instruction */
 461             fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
 462             fprintf(logfile,
 463                     "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
 464                     tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
 465             chunk_start = tcg_ctx->gen_insn_end_off[insn];
 466             disas(logfile, tb->tc.ptr, chunk_start);
 467
 468             /*
 469              * Dump each instruction chunk, wrapping up empty chunks into
 470              * the next instruction. The whole array is offset so the
 471              * first entry is the beginning of the 2nd instruction.
 472              */
 473             while (insn < tb->icount) {
 474                 size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
 475                 if (chunk_end > chunk_start) {
 476                     fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
 477                             tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
 478                     disas(logfile, tb->tc.ptr + chunk_start,
 479                           chunk_end - chunk_start);
 480                     chunk_start = chunk_end;
 481                 }
 482                 insn++;
 483             }
 484
 485             if (chunk_start < code_size) {
 486                 fprintf(logfile, "  -- tb slow paths + alignment\n");
 487                 disas(logfile, tb->tc.ptr + chunk_start,
 488                       code_size - chunk_start);
 489             }
 490
 491             /* Finally dump any data we may have after the block */
 492             if (data_size) {
 493                 int i;
 494                 fprintf(logfile, "  data: [size=%d]\n", data_size);
 495                 for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
 496                     if (sizeof(tcg_target_ulong) == 8) {
 497                         fprintf(logfile,
 498                                 "0x%08" PRIxPTR ":  .quad  0x%016" TCG_PRIlx "\n",
 499                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
 500                     } else if (sizeof(tcg_target_ulong) == 4) {
 501                         fprintf(logfile,
 502                                 "0x%08" PRIxPTR ":  .long  0x%08" TCG_PRIlx "\n",
 503                                 (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
 504                     } else {
 505                         qemu_build_not_reached();
 506                     }
 507                 }
 508             }
 509             fprintf(logfile, "\n");
 510             qemu_log_unlock(logfile);
 511         }
 512     }
 513
 514     qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
 515         ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
 516                  CODE_GEN_ALIGN));
 517
 518     /* init jump list */
 519     qemu_spin_init(&tb->jmp_lock);
 520     tb->jmp_list_head = (uintptr_t)NULL;
 521     tb->jmp_list_next[0] = (uintptr_t)NULL;
 522     tb->jmp_list_next[1] = (uintptr_t)NULL;
 523     tb->jmp_dest[0] = (uintptr_t)NULL;
 524     tb->jmp_dest[1] = (uintptr_t)NULL;
 525
 526     /* init original jump addresses which have been set during tcg_gen_code() */
 527     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
 528         tb_reset_jump(tb, 0);
 529     }
 530     if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
 531         tb_reset_jump(tb, 1);
 532     }
 533
 534     /*
 535      * If the TB is not associated with a physical RAM page then it must be
 536      * a temporary one-insn TB, and we have nothing left to do. Return early
 537      * before attempting to link to other TBs or add to the lookup table.
 538      */
 539     if (tb_page_addr0(tb) == -1) {
 540         return tb;
 541     }
 542
 543     /*
 544      * Insert TB into the corresponding region tree before publishing it
 545      * through QHT. Otherwise rewinding happened in the TB might fail to
 546      * lookup itself using host PC.
 547      */
 548     tcg_tb_insert(tb);
 549
 550     /*
 551      * No explicit memory barrier is required -- tb_link_page() makes the
 552      * TB visible in a consistent state.
 553      */
 554     existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
 555     /* if the TB already exists, discard what we just translated */
 556     if (unlikely(existing_tb != tb)) {
 557         uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
 558
 559         orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
 560         qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
 561         tcg_tb_remove(tb);
 562         return existing_tb;
 563     }
 564     return tb;
 565 }
 566
 567 /* user-mode: call with mmap_lock held */
 568 void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
 569 {
 570     TranslationBlock *tb;
 571
 572     assert_memory_lock();
 573
 574     tb = tcg_tb_lookup(retaddr);
 575     if (tb) {
 576         /* We can use retranslation to find the PC.  */
 577         cpu_restore_state_from_tb(cpu, tb, retaddr);
 578         tb_phys_invalidate(tb, -1);
 579     } else {
 580         /* The exception probably happened in a helper.  The CPU state should
 581            have been saved before calling it. Fetch the PC from there.  */
 582         CPUArchState *env = cpu->env_ptr;
 583         target_ulong pc, cs_base;
 584         tb_page_addr_t addr;
 585         uint32_t flags;
 586
 587         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 588         addr = get_page_addr_code(env, pc);
 589         if (addr != -1) {
 590             tb_invalidate_phys_range(addr, addr);
 591         }
 592     }
 593 }
 594
 595 #ifndef CONFIG_USER_ONLY
 596 /*
 597  * In deterministic execution mode, instructions doing device I/Os
 598  * must be at the end of the TB.
 599  *
 600  * Called by softmmu_template.h, with iothread mutex not held.
 601  */
 602 void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 603 {
 604     TranslationBlock *tb;
 605     CPUClass *cc;
 606     uint32_t n;
 607
 608     tb = tcg_tb_lookup(retaddr);
 609     if (!tb) {
 610         cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
 611                   (void *)retaddr);
 612     }
 613     cpu_restore_state_from_tb(cpu, tb, retaddr);
 614
 615     /*
 616      * Some guests must re-execute the branch when re-executing a delay
 617      * slot instruction.  When this is the case, adjust icount and N
 618      * to account for the re-execution of the branch.
 619      */
 620     n = 1;
 621     cc = CPU_GET_CLASS(cpu);
 622     if (cc->tcg_ops->io_recompile_replay_branch &&
 623         cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
 624         cpu_neg(cpu)->icount_decr.u16.low++;
 625         n = 2;
 626     }
 627
 628     /*
 629      * Exit the loop and potentially generate a new TB executing the
 630      * just the I/O insns. We also limit instrumentation to memory
 631      * operations only (which execute after completion) so we don't
 632      * double instrument the instruction.
 633      */
 634     cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
 635
 636     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
 637         target_ulong pc = log_pc(cpu, tb);
 638         if (qemu_log_in_addr_range(pc)) {
 639             qemu_log("cpu_io_recompile: rewound execution of TB to "
 640                      TARGET_FMT_lx "\n", pc);
 641         }
 642     }
 643
 644     cpu_loop_exit_noexc(cpu);
 645 }
 646
 647 static void print_qht_statistics(struct qht_stats hst, GString *buf)
 648 {
 649     uint32_t hgram_opts;
 650     size_t hgram_bins;
 651     char *hgram;
 652
 653     if (!hst.head_buckets) {
 654         return;
 655     }
 656     g_string_append_printf(buf, "TB hash buckets     %zu/%zu "
 657                            "(%0.2f%% head buckets used)\n",
 658                            hst.used_head_buckets, hst.head_buckets,
 659                            (double)hst.used_head_buckets /
 660                            hst.head_buckets * 100);
 661
 662     hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
 663     hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
 664     if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
 665         hgram_opts |= QDIST_PR_NODECIMAL;
 666     }
 667     hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
 668     g_string_append_printf(buf, "TB hash occupancy   %0.2f%% avg chain occ. "
 669                            "Histogram: %s\n",
 670                            qdist_avg(&hst.occupancy) * 100, hgram);
 671     g_free(hgram);
 672
 673     hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
 674     hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
 675     if (hgram_bins > 10) {
 676         hgram_bins = 10;
 677     } else {
 678         hgram_bins = 0;
 679         hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
 680     }
 681     hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
 682     g_string_append_printf(buf, "TB hash avg chain   %0.3f buckets. "
 683                            "Histogram: %s\n",
 684                            qdist_avg(&hst.chain), hgram);
 685     g_free(hgram);
 686 }
 687
 688 struct tb_tree_stats {
 689     size_t nb_tbs;
 690     size_t host_size;
 691     size_t target_size;
 692     size_t max_target_size;
 693     size_t direct_jmp_count;
 694     size_t direct_jmp2_count;
 695     size_t cross_page;
 696 };
 697
 698 static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
 699 {
 700     const TranslationBlock *tb = value;
 701     struct tb_tree_stats *tst = data;
 702
 703     tst->nb_tbs++;
 704     tst->host_size += tb->tc.size;
 705     tst->target_size += tb->size;
 706     if (tb->size > tst->max_target_size) {
 707         tst->max_target_size = tb->size;
 708     }
 709     if (tb_page_addr1(tb) != -1) {
 710         tst->cross_page++;
 711     }
 712     if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
 713         tst->direct_jmp_count++;
 714         if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
 715             tst->direct_jmp2_count++;
 716         }
 717     }
 718     return false;
 719 }
 720
 721 void dump_exec_info(GString *buf)
 722 {
 723     struct tb_tree_stats tst = {};
 724     struct qht_stats hst;
 725     size_t nb_tbs, flush_full, flush_part, flush_elide;
 726
 727     tcg_tb_foreach(tb_tree_stats_iter, &tst);
 728     nb_tbs = tst.nb_tbs;
 729     /* XXX: avoid using doubles ? */
 730     g_string_append_printf(buf, "Translation buffer state:\n");
 731     /*
 732      * Report total code size including the padding and TB structs;
 733      * otherwise users might think "-accel tcg,tb-size" is not honoured.
 734      * For avg host size we use the precise numbers from tb_tree_stats though.
 735      */
 736     g_string_append_printf(buf, "gen code size       %zu/%zu\n",
 737                            tcg_code_size(), tcg_code_capacity());
 738     g_string_append_printf(buf, "TB count            %zu\n", nb_tbs);
 739     g_string_append_printf(buf, "TB avg target size  %zu max=%zu bytes\n",
 740                            nb_tbs ? tst.target_size / nb_tbs : 0,
 741                            tst.max_target_size);
 742     g_string_append_printf(buf, "TB avg host size    %zu bytes "
 743                            "(expansion ratio: %0.1f)\n",
 744                            nb_tbs ? tst.host_size / nb_tbs : 0,
 745                            tst.target_size ?
 746                            (double)tst.host_size / tst.target_size : 0);
 747     g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
 748                            tst.cross_page,
 749                            nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
 750     g_string_append_printf(buf, "direct jump count   %zu (%zu%%) "
 751                            "(2 jumps=%zu %zu%%)\n",
 752                            tst.direct_jmp_count,
 753                            nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
 754                            tst.direct_jmp2_count,
 755                            nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
 756
 757     qht_statistics_init(&tb_ctx.htable, &hst);
 758     print_qht_statistics(hst, buf);
 759     qht_statistics_destroy(&hst);
 760
 761     g_string_append_printf(buf, "\nStatistics:\n");
 762     g_string_append_printf(buf, "TB flush count      %u\n",
 763                            qatomic_read(&tb_ctx.tb_flush_count));
 764     g_string_append_printf(buf, "TB invalidate count %u\n",
 765                            qatomic_read(&tb_ctx.tb_phys_invalidate_count));
 766
 767     tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
 768     g_string_append_printf(buf, "TLB full flushes    %zu\n", flush_full);
 769     g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
 770     g_string_append_printf(buf, "TLB elided flushes  %zu\n", flush_elide);
 771     tcg_dump_info(buf);
 772 }
 773
 774 #else /* CONFIG_USER_ONLY */
 775
 776 void cpu_interrupt(CPUState *cpu, int mask)
 777 {
 778     g_assert(qemu_mutex_iothread_locked());
 779     cpu->interrupt_request |= mask;
 780     qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
 781 }
 782
 783 #endif /* CONFIG_USER_ONLY */
 784
 785 /*
 786  * Called by generic code at e.g. cpu reset after cpu creation,
 787  * therefore we must be prepared to allocate the jump cache.
 788  */
 789 void tcg_flush_jmp_cache(CPUState *cpu)
 790 {
 791     CPUJumpCache *jc = cpu->tb_jmp_cache;
 792
 793     /* During early initialization, the cache may not yet be allocated. */
 794     if (unlikely(jc == NULL)) {
 795         return;
 796     }
 797
 798     for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
 799         qatomic_set(&jc->array[i].tb, NULL);
 800     }
 801 }
 802
 803 /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
 804 void tcg_flush_softmmu_tlb(CPUState *cs)
 805 {
 806 #ifdef CONFIG_SOFTMMU
 807     tlb_flush(cs);
 808 #endif
 809 }