accel/tcg/cpu-exec.c

   1 /*
   2  *  emulator main execution loop
   3  *
   4  *  Copyright (c) 2003-2005 Fabrice Bellard
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "qemu/qemu-print.h"
  22 #include "qapi/error.h"
  23 #include "qapi/qapi-commands-machine.h"
  24 #include "qapi/type-helpers.h"
  25 #include "hw/core/tcg-cpu-ops.h"
  26 #include "trace.h"
  27 #include "disas/disas.h"
  28 #include "exec/exec-all.h"
  29 #include "tcg/tcg.h"
  30 #include "qemu/atomic.h"
  31 #include "qemu/compiler.h"
  32 #include "qemu/timer.h"
  33 #include "qemu/rcu.h"
  34 #include "exec/log.h"
  35 #include "qemu/main-loop.h"
  36 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
  37 #include "hw/i386/apic.h"
  38 #endif
  39 #include "sysemu/cpus.h"
  40 #include "exec/cpu-all.h"
  41 #include "sysemu/cpu-timers.h"
  42 #include "sysemu/replay.h"
  43 #include "sysemu/tcg.h"
  44 #include "exec/helper-proto.h"
  45 #include "tb-jmp-cache.h"
  46 #include "tb-hash.h"
  47 #include "tb-context.h"
  48 #include "internal.h"
  49
  50 /* -icount align implementation. */
  51
  52 typedef struct SyncClocks {
  53     int64_t diff_clk;
  54     int64_t last_cpu_icount;
  55     int64_t realtime_clock;
  56 } SyncClocks;
  57
  58 #if !defined(CONFIG_USER_ONLY)
  59 /* Allow the guest to have a max 3ms advance.
  60  * The difference between the 2 clocks could therefore
  61  * oscillate around 0.
  62  */
  63 #define VM_CLOCK_ADVANCE 3000000
  64 #define THRESHOLD_REDUCE 1.5
  65 #define MAX_DELAY_PRINT_RATE 2000000000LL
  66 #define MAX_NB_PRINTS 100
  67
  68 static int64_t max_delay;
  69 static int64_t max_advance;
  70
  71 static void align_clocks(SyncClocks *sc, CPUState *cpu)
  72 {
  73     int64_t cpu_icount;
  74
  75     if (!icount_align_option) {
  76         return;
  77     }
  78
  79     cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
  80     sc->diff_clk += icount_to_ns(sc->last_cpu_icount - cpu_icount);
  81     sc->last_cpu_icount = cpu_icount;
  82
  83     if (sc->diff_clk > VM_CLOCK_ADVANCE) {
  84 #ifndef _WIN32
  85         struct timespec sleep_delay, rem_delay;
  86         sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
  87         sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
  88         if (nanosleep(&sleep_delay, &rem_delay) < 0) {
  89             sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
  90         } else {
  91             sc->diff_clk = 0;
  92         }
  93 #else
  94         Sleep(sc->diff_clk / SCALE_MS);
  95         sc->diff_clk = 0;
  96 #endif
  97     }
  98 }
  99
 100 static void print_delay(const SyncClocks *sc)
 101 {
 102     static float threshold_delay;
 103     static int64_t last_realtime_clock;
 104     static int nb_prints;
 105
 106     if (icount_align_option &&
 107         sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
 108         nb_prints < MAX_NB_PRINTS) {
 109         if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
 110             (-sc->diff_clk / (float)1000000000LL <
 111              (threshold_delay - THRESHOLD_REDUCE))) {
 112             threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
 113             qemu_printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
 114                         threshold_delay - 1,
 115                         threshold_delay);
 116             nb_prints++;
 117             last_realtime_clock = sc->realtime_clock;
 118         }
 119     }
 120 }
 121
 122 static void init_delay_params(SyncClocks *sc, CPUState *cpu)
 123 {
 124     if (!icount_align_option) {
 125         return;
 126     }
 127     sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 128     sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
 129     sc->last_cpu_icount
 130         = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low;
 131     if (sc->diff_clk < max_delay) {
 132         max_delay = sc->diff_clk;
 133     }
 134     if (sc->diff_clk > max_advance) {
 135         max_advance = sc->diff_clk;
 136     }
 137
 138     /* Print every 2s max if the guest is late. We limit the number
 139        of printed messages to NB_PRINT_MAX(currently 100) */
 140     print_delay(sc);
 141 }
 142 #else
 143 static void align_clocks(SyncClocks *sc, const CPUState *cpu)
 144 {
 145 }
 146
 147 static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
 148 {
 149 }
 150 #endif /* CONFIG USER ONLY */
 151
 152 uint32_t curr_cflags(CPUState *cpu)
 153 {
 154     uint32_t cflags = cpu->tcg_cflags;
 155
 156     /*
 157      * Record gdb single-step.  We should be exiting the TB by raising
 158      * EXCP_DEBUG, but to simplify other tests, disable chaining too.
 159      *
 160      * For singlestep and -d nochain, suppress goto_tb so that
 161      * we can log -d cpu,exec after every TB.
 162      */
 163     if (unlikely(cpu->singlestep_enabled)) {
 164         cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
 165     } else if (singlestep) {
 166         cflags |= CF_NO_GOTO_TB | 1;
 167     } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
 168         cflags |= CF_NO_GOTO_TB;
 169     }
 170
 171     return cflags;
 172 }
 173
 174 struct tb_desc {
 175     target_ulong pc;
 176     target_ulong cs_base;
 177     CPUArchState *env;
 178     tb_page_addr_t page_addr0;
 179     uint32_t flags;
 180     uint32_t cflags;
 181     uint32_t trace_vcpu_dstate;
 182 };
 183
 184 static bool tb_lookup_cmp(const void *p, const void *d)
 185 {
 186     const TranslationBlock *tb = p;
 187     const struct tb_desc *desc = d;
 188
 189     if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
 190         tb_page_addr0(tb) == desc->page_addr0 &&
 191         tb->cs_base == desc->cs_base &&
 192         tb->flags == desc->flags &&
 193         tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
 194         tb_cflags(tb) == desc->cflags) {
 195         /* check next page if needed */
 196         tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
 197         if (tb_phys_page1 == -1) {
 198             return true;
 199         } else {
 200             tb_page_addr_t phys_page1;
 201             target_ulong virt_page1;
 202
 203             /*
 204              * We know that the first page matched, and an otherwise valid TB
 205              * encountered an incomplete instruction at the end of that page,
 206              * therefore we know that generating a new TB from the current PC
 207              * must also require reading from the next page -- even if the
 208              * second pages do not match, and therefore the resulting insn
 209              * is different for the new TB.  Therefore any exception raised
 210              * here by the faulting lookup is not premature.
 211              */
 212             virt_page1 = TARGET_PAGE_ALIGN(desc->pc);
 213             phys_page1 = get_page_addr_code(desc->env, virt_page1);
 214             if (tb_phys_page1 == phys_page1) {
 215                 return true;
 216             }
 217         }
 218     }
 219     return false;
 220 }
 221
 222 static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
 223                                           target_ulong cs_base, uint32_t flags,
 224                                           uint32_t cflags)
 225 {
 226     tb_page_addr_t phys_pc;
 227     struct tb_desc desc;
 228     uint32_t h;
 229
 230     desc.env = cpu->env_ptr;
 231     desc.cs_base = cs_base;
 232     desc.flags = flags;
 233     desc.cflags = cflags;
 234     desc.trace_vcpu_dstate = *cpu->trace_dstate;
 235     desc.pc = pc;
 236     phys_pc = get_page_addr_code(desc.env, pc);
 237     if (phys_pc == -1) {
 238         return NULL;
 239     }
 240     desc.page_addr0 = phys_pc;
 241     h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
 242                      flags, cflags, *cpu->trace_dstate);
 243     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 244 }
 245
 246 /* Might cause an exception, so have a longjmp destination ready */
 247 static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
 248                                           target_ulong cs_base,
 249                                           uint32_t flags, uint32_t cflags)
 250 {
 251     TranslationBlock *tb;
 252     CPUJumpCache *jc;
 253     uint32_t hash;
 254
 255     /* we should never be trying to look up an INVALID tb */
 256     tcg_debug_assert(!(cflags & CF_INVALID));
 257
 258     hash = tb_jmp_cache_hash_func(pc);
 259     jc = cpu->tb_jmp_cache;
 260     tb = tb_jmp_cache_get_tb(jc, hash);
 261
 262     if (likely(tb &&
 263                tb_jmp_cache_get_pc(jc, hash, tb) == pc &&
 264                tb->cs_base == cs_base &&
 265                tb->flags == flags &&
 266                tb->trace_vcpu_dstate == *cpu->trace_dstate &&
 267                tb_cflags(tb) == cflags)) {
 268         return tb;
 269     }
 270     tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
 271     if (tb == NULL) {
 272         return NULL;
 273     }
 274     tb_jmp_cache_set(jc, hash, tb, pc);
 275     return tb;
 276 }
 277
 278 static void log_cpu_exec(target_ulong pc, CPUState *cpu,
 279                          const TranslationBlock *tb)
 280 {
 281     if (qemu_log_in_addr_range(pc)) {
 282         qemu_log_mask(CPU_LOG_EXEC,
 283                       "Trace %d: %p [" TARGET_FMT_lx
 284                       "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
 285                       cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
 286                       tb->flags, tb->cflags, lookup_symbol(pc));
 287
 288 #if defined(DEBUG_DISAS)
 289         if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
 290             FILE *logfile = qemu_log_trylock();
 291             if (logfile) {
 292                 int flags = 0;
 293
 294                 if (qemu_loglevel_mask(CPU_LOG_TB_FPU)) {
 295                     flags |= CPU_DUMP_FPU;
 296                 }
 297 #if defined(TARGET_I386)
 298                 flags |= CPU_DUMP_CCOP;
 299 #endif
 300                 cpu_dump_state(cpu, logfile, flags);
 301                 qemu_log_unlock(logfile);
 302             }
 303         }
 304 #endif /* DEBUG_DISAS */
 305     }
 306 }
 307
 308 static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
 309                                        uint32_t *cflags)
 310 {
 311     CPUBreakpoint *bp;
 312     bool match_page = false;
 313
 314     /*
 315      * Singlestep overrides breakpoints.
 316      * This requirement is visible in the record-replay tests, where
 317      * we would fail to make forward progress in reverse-continue.
 318      *
 319      * TODO: gdb singlestep should only override gdb breakpoints,
 320      * so that one could (gdb) singlestep into the guest kernel's
 321      * architectural breakpoint handler.
 322      */
 323     if (cpu->singlestep_enabled) {
 324         return false;
 325     }
 326
 327     QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
 328         /*
 329          * If we have an exact pc match, trigger the breakpoint.
 330          * Otherwise, note matches within the page.
 331          */
 332         if (pc == bp->pc) {
 333             bool match_bp = false;
 334
 335             if (bp->flags & BP_GDB) {
 336                 match_bp = true;
 337             } else if (bp->flags & BP_CPU) {
 338 #ifdef CONFIG_USER_ONLY
 339                 g_assert_not_reached();
 340 #else
 341                 CPUClass *cc = CPU_GET_CLASS(cpu);
 342                 assert(cc->tcg_ops->debug_check_breakpoint);
 343                 match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 344 #endif
 345             }
 346
 347             if (match_bp) {
 348                 cpu->exception_index = EXCP_DEBUG;
 349                 return true;
 350             }
 351         } else if (((pc ^ bp->pc) & TARGET_PAGE_MASK) == 0) {
 352             match_page = true;
 353         }
 354     }
 355
 356     /*
 357      * Within the same page as a breakpoint, single-step,
 358      * returning to helper_lookup_tb_ptr after each insn looking
 359      * for the actual breakpoint.
 360      *
 361      * TODO: Perhaps better to record all of the TBs associated
 362      * with a given virtual page that contains a breakpoint, and
 363      * then invalidate them when a new overlapping breakpoint is
 364      * set on the page.  Non-overlapping TBs would not be
 365      * invalidated, nor would any TB need to be invalidated as
 366      * breakpoints are removed.
 367      */
 368     if (match_page) {
 369         *cflags = (*cflags & ~CF_COUNT_MASK) | CF_NO_GOTO_TB | 1;
 370     }
 371     return false;
 372 }
 373
 374 static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
 375                                          uint32_t *cflags)
 376 {
 377     return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
 378         check_for_breakpoints_slow(cpu, pc, cflags);
 379 }
 380
 381 /**
 382  * helper_lookup_tb_ptr: quick check for next tb
 383  * @env: current cpu state
 384  *
 385  * Look for an existing TB matching the current cpu state.
 386  * If found, return the code pointer.  If not found, return
 387  * the tcg epilogue so that we return into cpu_tb_exec.
 388  */
 389 const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 390 {
 391     CPUState *cpu = env_cpu(env);
 392     TranslationBlock *tb;
 393     target_ulong cs_base, pc;
 394     uint32_t flags, cflags;
 395
 396     cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 397
 398     cflags = curr_cflags(cpu);
 399     if (check_for_breakpoints(cpu, pc, &cflags)) {
 400         cpu_loop_exit(cpu);
 401     }
 402
 403     tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 404     if (tb == NULL) {
 405         return tcg_code_gen_epilogue;
 406     }
 407
 408     if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 409         log_cpu_exec(pc, cpu, tb);
 410     }
 411
 412     return tb->tc.ptr;
 413 }
 414
 415 /* Execute a TB, and fix up the CPU state afterwards if necessary */
 416 /*
 417  * Disable CFI checks.
 418  * TCG creates binary blobs at runtime, with the transformed code.
 419  * A TB is a blob of binary code, created at runtime and called with an
 420  * indirect function call. Since such function did not exist at compile time,
 421  * the CFI runtime has no way to verify its signature and would fail.
 422  * TCG is not considered a security-sensitive part of QEMU so this does not
 423  * affect the impact of CFI in environment with high security requirements
 424  */
 425 static inline TranslationBlock * QEMU_DISABLE_CFI
 426 cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 427 {
 428     CPUArchState *env = cpu->env_ptr;
 429     uintptr_t ret;
 430     TranslationBlock *last_tb;
 431     const void *tb_ptr = itb->tc.ptr;
 432
 433     if (qemu_loglevel_mask(CPU_LOG_TB_CPU | CPU_LOG_EXEC)) {
 434         log_cpu_exec(log_pc(cpu, itb), cpu, itb);
 435     }
 436
 437     qemu_thread_jit_execute();
 438     ret = tcg_qemu_tb_exec(env, tb_ptr);
 439     cpu->can_do_io = 1;
 440     /*
 441      * TODO: Delay swapping back to the read-write region of the TB
 442      * until we actually need to modify the TB.  The read-only copy,
 443      * coming from the rx region, shares the same host TLB entry as
 444      * the code that executed the exit_tb opcode that arrived here.
 445      * If we insist on touching both the RX and the RW pages, we
 446      * double the host TLB pressure.
 447      */
 448     last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK));
 449     *tb_exit = ret & TB_EXIT_MASK;
 450
 451     trace_exec_tb_exit(last_tb, *tb_exit);
 452
 453     if (*tb_exit > TB_EXIT_IDX1) {
 454         /* We didn't start executing this TB (eg because the instruction
 455          * counter hit zero); we must restore the guest PC to the address
 456          * of the start of the TB.
 457          */
 458         CPUClass *cc = CPU_GET_CLASS(cpu);
 459
 460         if (cc->tcg_ops->synchronize_from_tb) {
 461             cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
 462         } else {
 463             assert(!TARGET_TB_PCREL);
 464             assert(cc->set_pc);
 465             cc->set_pc(cpu, tb_pc(last_tb));
 466         }
 467         if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
 468             target_ulong pc = log_pc(cpu, last_tb);
 469             if (qemu_log_in_addr_range(pc)) {
 470                 qemu_log("Stopped execution of TB chain before %p ["
 471                          TARGET_FMT_lx "] %s\n",
 472                          last_tb->tc.ptr, pc, lookup_symbol(pc));
 473             }
 474         }
 475     }
 476
 477     /*
 478      * If gdb single-step, and we haven't raised another exception,
 479      * raise a debug exception.  Single-step with another exception
 480      * is handled in cpu_handle_exception.
 481      */
 482     if (unlikely(cpu->singlestep_enabled) && cpu->exception_index == -1) {
 483         cpu->exception_index = EXCP_DEBUG;
 484         cpu_loop_exit(cpu);
 485     }
 486
 487     return last_tb;
 488 }
 489
 490
 491 static void cpu_exec_enter(CPUState *cpu)
 492 {
 493     CPUClass *cc = CPU_GET_CLASS(cpu);
 494
 495     if (cc->tcg_ops->cpu_exec_enter) {
 496         cc->tcg_ops->cpu_exec_enter(cpu);
 497     }
 498 }
 499
 500 static void cpu_exec_exit(CPUState *cpu)
 501 {
 502     CPUClass *cc = CPU_GET_CLASS(cpu);
 503
 504     if (cc->tcg_ops->cpu_exec_exit) {
 505         cc->tcg_ops->cpu_exec_exit(cpu);
 506     }
 507     QEMU_PLUGIN_ASSERT(cpu->plugin_mem_cbs == NULL);
 508 }
 509
 510 void cpu_exec_step_atomic(CPUState *cpu)
 511 {
 512     CPUArchState *env = cpu->env_ptr;
 513     TranslationBlock *tb;
 514     target_ulong cs_base, pc;
 515     uint32_t flags, cflags;
 516     int tb_exit;
 517
 518     if (sigsetjmp(cpu->jmp_env, 0) == 0) {
 519         start_exclusive();
 520         g_assert(cpu == current_cpu);
 521         g_assert(!cpu->running);
 522         cpu->running = true;
 523
 524         cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
 525
 526         cflags = curr_cflags(cpu);
 527         /* Execute in a serial context. */
 528         cflags &= ~CF_PARALLEL;
 529         /* After 1 insn, return and release the exclusive lock. */
 530         cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | 1;
 531         /*
 532          * No need to check_for_breakpoints here.
 533          * We only arrive in cpu_exec_step_atomic after beginning execution
 534          * of an insn that includes an atomic operation we can't handle.
 535          * Any breakpoint for this insn will have been recognized earlier.
 536          */
 537
 538         tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 539         if (tb == NULL) {
 540             mmap_lock();
 541             tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 542             mmap_unlock();
 543         }
 544
 545         cpu_exec_enter(cpu);
 546         /* execute the generated code */
 547         trace_exec_tb(tb, pc);
 548         cpu_tb_exec(cpu, tb, &tb_exit);
 549         cpu_exec_exit(cpu);
 550     } else {
 551 #ifndef CONFIG_SOFTMMU
 552         clear_helper_retaddr();
 553         if (have_mmap_lock()) {
 554             mmap_unlock();
 555         }
 556 #endif
 557         if (qemu_mutex_iothread_locked()) {
 558             qemu_mutex_unlock_iothread();
 559         }
 560         assert_no_pages_locked();
 561         qemu_plugin_disable_mem_helpers(cpu);
 562     }
 563
 564     /*
 565      * As we start the exclusive region before codegen we must still
 566      * be in the region if we longjump out of either the codegen or
 567      * the execution.
 568      */
 569     g_assert(cpu_in_exclusive_context(cpu));
 570     cpu->running = false;
 571     end_exclusive();
 572 }
 573
 574 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
 575 {
 576     /*
 577      * Get the rx view of the structure, from which we find the
 578      * executable code address, and tb_target_set_jmp_target can
 579      * produce a pc-relative displacement to jmp_target_addr[n].
 580      */
 581     const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb);
 582     uintptr_t offset = tb->jmp_insn_offset[n];
 583     uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset;
 584     uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff;
 585
 586     tb->jmp_target_addr[n] = addr;
 587     tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw);
 588 }
 589
 590 static inline void tb_add_jump(TranslationBlock *tb, int n,
 591                                TranslationBlock *tb_next)
 592 {
 593     uintptr_t old;
 594
 595     qemu_thread_jit_write();
 596     assert(n < ARRAY_SIZE(tb->jmp_list_next));
 597     qemu_spin_lock(&tb_next->jmp_lock);
 598
 599     /* make sure the destination TB is valid */
 600     if (tb_next->cflags & CF_INVALID) {
 601         goto out_unlock_next;
 602     }
 603     /* Atomically claim the jump destination slot only if it was NULL */
 604     old = qatomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL,
 605                           (uintptr_t)tb_next);
 606     if (old) {
 607         goto out_unlock_next;
 608     }
 609
 610     /* patch the native jump address */
 611     tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
 612
 613     /* add in TB jmp list */
 614     tb->jmp_list_next[n] = tb_next->jmp_list_head;
 615     tb_next->jmp_list_head = (uintptr_t)tb | n;
 616
 617     qemu_spin_unlock(&tb_next->jmp_lock);
 618
 619     qemu_log_mask(CPU_LOG_EXEC, "Linking TBs %p index %d -> %p\n",
 620                   tb->tc.ptr, n, tb_next->tc.ptr);
 621     return;
 622
 623  out_unlock_next:
 624     qemu_spin_unlock(&tb_next->jmp_lock);
 625     return;
 626 }
 627
 628 static inline bool cpu_handle_halt(CPUState *cpu)
 629 {
 630 #ifndef CONFIG_USER_ONLY
 631     if (cpu->halted) {
 632 #if defined(TARGET_I386)
 633         if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 634             X86CPU *x86_cpu = X86_CPU(cpu);
 635             qemu_mutex_lock_iothread();
 636             apic_poll_irq(x86_cpu->apic_state);
 637             cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
 638             qemu_mutex_unlock_iothread();
 639         }
 640 #endif /* TARGET_I386 */
 641         if (!cpu_has_work(cpu)) {
 642             return true;
 643         }
 644
 645         cpu->halted = 0;
 646     }
 647 #endif /* !CONFIG_USER_ONLY */
 648
 649     return false;
 650 }
 651
 652 static inline void cpu_handle_debug_exception(CPUState *cpu)
 653 {
 654     CPUClass *cc = CPU_GET_CLASS(cpu);
 655     CPUWatchpoint *wp;
 656
 657     if (!cpu->watchpoint_hit) {
 658         QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
 659             wp->flags &= ~BP_WATCHPOINT_HIT;
 660         }
 661     }
 662
 663     if (cc->tcg_ops->debug_excp_handler) {
 664         cc->tcg_ops->debug_excp_handler(cpu);
 665     }
 666 }
 667
 668 static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
 669 {
 670     if (cpu->exception_index < 0) {
 671 #ifndef CONFIG_USER_ONLY
 672         if (replay_has_exception()
 673             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) {
 674             /* Execute just one insn to trigger exception pending in the log */
 675             cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
 676                 | CF_NOIRQ | 1;
 677         }
 678 #endif
 679         return false;
 680     }
 681     if (cpu->exception_index >= EXCP_INTERRUPT) {
 682         /* exit request from the cpu execution loop */
 683         *ret = cpu->exception_index;
 684         if (*ret == EXCP_DEBUG) {
 685             cpu_handle_debug_exception(cpu);
 686         }
 687         cpu->exception_index = -1;
 688         return true;
 689     } else {
 690 #if defined(CONFIG_USER_ONLY)
 691         /* if user mode only, we simulate a fake exception
 692            which will be handled outside the cpu execution
 693            loop */
 694 #if defined(TARGET_I386)
 695         CPUClass *cc = CPU_GET_CLASS(cpu);
 696         cc->tcg_ops->fake_user_interrupt(cpu);
 697 #endif /* TARGET_I386 */
 698         *ret = cpu->exception_index;
 699         cpu->exception_index = -1;
 700         return true;
 701 #else
 702         if (replay_exception()) {
 703             CPUClass *cc = CPU_GET_CLASS(cpu);
 704             qemu_mutex_lock_iothread();
 705             cc->tcg_ops->do_interrupt(cpu);
 706             qemu_mutex_unlock_iothread();
 707             cpu->exception_index = -1;
 708
 709             if (unlikely(cpu->singlestep_enabled)) {
 710                 /*
 711                  * After processing the exception, ensure an EXCP_DEBUG is
 712                  * raised when single-stepping so that GDB doesn't miss the
 713                  * next instruction.
 714                  */
 715                 *ret = EXCP_DEBUG;
 716                 cpu_handle_debug_exception(cpu);
 717                 return true;
 718             }
 719         } else if (!replay_has_interrupt()) {
 720             /* give a chance to iothread in replay mode */
 721             *ret = EXCP_INTERRUPT;
 722             return true;
 723         }
 724 #endif
 725     }
 726
 727     return false;
 728 }
 729
 730 #ifndef CONFIG_USER_ONLY
 731 /*
 732  * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 733  * "real" interrupt event later. It does not need to be recorded for
 734  * replay purposes.
 735  */
 736 static inline bool need_replay_interrupt(int interrupt_request)
 737 {
 738 #if defined(TARGET_I386)
 739     return !(interrupt_request & CPU_INTERRUPT_POLL);
 740 #else
 741     return true;
 742 #endif
 743 }
 744 #endif /* !CONFIG_USER_ONLY */
 745
 746 static inline bool cpu_handle_interrupt(CPUState *cpu,
 747                                         TranslationBlock **last_tb)
 748 {
 749     /*
 750      * If we have requested custom cflags with CF_NOIRQ we should
 751      * skip checking here. Any pending interrupts will get picked up
 752      * by the next TB we execute under normal cflags.
 753      */
 754     if (cpu->cflags_next_tb != -1 && cpu->cflags_next_tb & CF_NOIRQ) {
 755         return false;
 756     }
 757
 758     /* Clear the interrupt flag now since we're processing
 759      * cpu->interrupt_request and cpu->exit_request.
 760      * Ensure zeroing happens before reading cpu->exit_request or
 761      * cpu->interrupt_request (see also smp_wmb in cpu_exit())
 762      */
 763     qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);
 764
 765     if (unlikely(qatomic_read(&cpu->interrupt_request))) {
 766         int interrupt_request;
 767         qemu_mutex_lock_iothread();
 768         interrupt_request = cpu->interrupt_request;
 769         if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
 770             /* Mask out external interrupts for this step. */
 771             interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
 772         }
 773         if (interrupt_request & CPU_INTERRUPT_DEBUG) {
 774             cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
 775             cpu->exception_index = EXCP_DEBUG;
 776             qemu_mutex_unlock_iothread();
 777             return true;
 778         }
 779 #if !defined(CONFIG_USER_ONLY)
 780         if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
 781             /* Do nothing */
 782         } else if (interrupt_request & CPU_INTERRUPT_HALT) {
 783             replay_interrupt();
 784             cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
 785             cpu->halted = 1;
 786             cpu->exception_index = EXCP_HLT;
 787             qemu_mutex_unlock_iothread();
 788             return true;
 789         }
 790 #if defined(TARGET_I386)
 791         else if (interrupt_request & CPU_INTERRUPT_INIT) {
 792             X86CPU *x86_cpu = X86_CPU(cpu);
 793             CPUArchState *env = &x86_cpu->env;
 794             replay_interrupt();
 795             cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
 796             do_cpu_init(x86_cpu);
 797             cpu->exception_index = EXCP_HALTED;
 798             qemu_mutex_unlock_iothread();
 799             return true;
 800         }
 801 #else
 802         else if (interrupt_request & CPU_INTERRUPT_RESET) {
 803             replay_interrupt();
 804             cpu_reset(cpu);
 805             qemu_mutex_unlock_iothread();
 806             return true;
 807         }
 808 #endif /* !TARGET_I386 */
 809         /* The target hook has 3 exit conditions:
 810            False when the interrupt isn't processed,
 811            True when it is, and we should restart on a new TB,
 812            and via longjmp via cpu_loop_exit.  */
 813         else {
 814             CPUClass *cc = CPU_GET_CLASS(cpu);
 815
 816             if (cc->tcg_ops->cpu_exec_interrupt &&
 817                 cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
 818                 if (need_replay_interrupt(interrupt_request)) {
 819                     replay_interrupt();
 820                 }
 821                 /*
 822                  * After processing the interrupt, ensure an EXCP_DEBUG is
 823                  * raised when single-stepping so that GDB doesn't miss the
 824                  * next instruction.
 825                  */
 826                 if (unlikely(cpu->singlestep_enabled)) {
 827                     cpu->exception_index = EXCP_DEBUG;
 828                     qemu_mutex_unlock_iothread();
 829                     return true;
 830                 }
 831                 cpu->exception_index = -1;
 832                 *last_tb = NULL;
 833             }
 834             /* The target hook may have updated the 'cpu->interrupt_request';
 835              * reload the 'interrupt_request' value */
 836             interrupt_request = cpu->interrupt_request;
 837         }
 838 #endif /* !CONFIG_USER_ONLY */
 839         if (interrupt_request & CPU_INTERRUPT_EXITTB) {
 840             cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
 841             /* ensure that no TB jump will be modified as
 842                the program flow was changed */
 843             *last_tb = NULL;
 844         }
 845
 846         /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
 847         qemu_mutex_unlock_iothread();
 848     }
 849
 850     /* Finally, check if we need to exit to the main loop.  */
 851     if (unlikely(qatomic_read(&cpu->exit_request))
 852         || (icount_enabled()
 853             && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
 854             && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) {
 855         qatomic_set(&cpu->exit_request, 0);
 856         if (cpu->exception_index == -1) {
 857             cpu->exception_index = EXCP_INTERRUPT;
 858         }
 859         return true;
 860     }
 861
 862     return false;
 863 }
 864
 865 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
 866                                     target_ulong pc,
 867                                     TranslationBlock **last_tb, int *tb_exit)
 868 {
 869     int32_t insns_left;
 870
 871     trace_exec_tb(tb, pc);
 872     tb = cpu_tb_exec(cpu, tb, tb_exit);
 873     if (*tb_exit != TB_EXIT_REQUESTED) {
 874         *last_tb = tb;
 875         return;
 876     }
 877
 878     *last_tb = NULL;
 879     insns_left = qatomic_read(&cpu_neg(cpu)->icount_decr.u32);
 880     if (insns_left < 0) {
 881         /* Something asked us to stop executing chained TBs; just
 882          * continue round the main loop. Whatever requested the exit
 883          * will also have set something else (eg exit_request or
 884          * interrupt_request) which will be handled by
 885          * cpu_handle_interrupt.  cpu_handle_interrupt will also
 886          * clear cpu->icount_decr.u16.high.
 887          */
 888         return;
 889     }
 890
 891     /* Instruction counter expired.  */
 892     assert(icount_enabled());
 893 #ifndef CONFIG_USER_ONLY
 894     /* Ensure global icount has gone forward */
 895     icount_update(cpu);
 896     /* Refill decrementer and continue execution.  */
 897     insns_left = MIN(0xffff, cpu->icount_budget);
 898     cpu_neg(cpu)->icount_decr.u16.low = insns_left;
 899     cpu->icount_extra = cpu->icount_budget - insns_left;
 900
 901     /*
 902      * If the next tb has more instructions than we have left to
 903      * execute we need to ensure we find/generate a TB with exactly
 904      * insns_left instructions in it.
 905      */
 906     if (insns_left > 0 && insns_left < tb->icount)  {
 907         assert(insns_left <= CF_COUNT_MASK);
 908         assert(cpu->icount_extra == 0);
 909         cpu->cflags_next_tb = (tb->cflags & ~CF_COUNT_MASK) | insns_left;
 910     }
 911 #endif
 912 }
 913
 914 /* main execution loop */
 915
 916 static int __attribute__((noinline))
 917 cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
 918 {
 919     int ret;
 920
 921     /* if an exception is pending, we execute it here */
 922     while (!cpu_handle_exception(cpu, &ret)) {
 923         TranslationBlock *last_tb = NULL;
 924         int tb_exit = 0;
 925
 926         while (!cpu_handle_interrupt(cpu, &last_tb)) {
 927             TranslationBlock *tb;
 928             target_ulong cs_base, pc;
 929             uint32_t flags, cflags;
 930
 931             cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
 932
 933             /*
 934              * When requested, use an exact setting for cflags for the next
 935              * execution.  This is used for icount, precise smc, and stop-
 936              * after-access watchpoints.  Since this request should never
 937              * have CF_INVALID set, -1 is a convenient invalid value that
 938              * does not require tcg headers for cpu_common_reset.
 939              */
 940             cflags = cpu->cflags_next_tb;
 941             if (cflags == -1) {
 942                 cflags = curr_cflags(cpu);
 943             } else {
 944                 cpu->cflags_next_tb = -1;
 945             }
 946
 947             if (check_for_breakpoints(cpu, pc, &cflags)) {
 948                 break;
 949             }
 950
 951             tb = tb_lookup(cpu, pc, cs_base, flags, cflags);
 952             if (tb == NULL) {
 953                 uint32_t h;
 954
 955                 mmap_lock();
 956                 tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
 957                 mmap_unlock();
 958                 /*
 959                  * We add the TB in the virtual pc hash table
 960                  * for the fast lookup
 961                  */
 962                 h = tb_jmp_cache_hash_func(pc);
 963                 tb_jmp_cache_set(cpu->tb_jmp_cache, h, tb, pc);
 964             }
 965
 966 #ifndef CONFIG_USER_ONLY
 967             /*
 968              * We don't take care of direct jumps when address mapping
 969              * changes in system emulation.  So it's not safe to make a
 970              * direct jump to a TB spanning two pages because the mapping
 971              * for the second page can change.
 972              */
 973             if (tb_page_addr1(tb) != -1) {
 974                 last_tb = NULL;
 975             }
 976 #endif
 977             /* See if we can patch the calling TB. */
 978             if (last_tb) {
 979                 tb_add_jump(last_tb, tb_exit, tb);
 980             }
 981
 982             cpu_loop_exec_tb(cpu, tb, pc, &last_tb, &tb_exit);
 983
 984             QEMU_PLUGIN_ASSERT(cpu->plugin_mem_cbs == NULL);
 985             /* Try to align the host and virtual clocks
 986                if the guest is in advance */
 987             align_clocks(sc, cpu);
 988         }
 989     }
 990     return ret;
 991 }
 992
 993 static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
 994 {
 995     /* Prepare setjmp context for exception handling. */
 996     if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
 997         /* Non-buggy compilers preserve this; assert the correct value. */
 998         g_assert(cpu == current_cpu);
 999
1000 #ifndef CONFIG_SOFTMMU
1001         clear_helper_retaddr();
1002         if (have_mmap_lock()) {
1003             mmap_unlock();
1004         }
1005 #endif
1006         if (qemu_mutex_iothread_locked()) {
1007             qemu_mutex_unlock_iothread();
1008         }
1009         qemu_plugin_disable_mem_helpers(cpu);
1010
1011         assert_no_pages_locked();
1012     }
1013
1014     return cpu_exec_loop(cpu, sc);
1015 }
1016
1017 int cpu_exec(CPUState *cpu)
1018 {
1019     int ret;
1020     SyncClocks sc = { 0 };
1021
1022     /* replay_interrupt may need current_cpu */
1023     current_cpu = cpu;
1024
1025     if (cpu_handle_halt(cpu)) {
1026         return EXCP_HALTED;
1027     }
1028
1029     rcu_read_lock();
1030     cpu_exec_enter(cpu);
1031
1032     /*
1033      * Calculate difference between guest clock and host clock.
1034      * This delay includes the delay of the last cycle, so
1035      * what we have to do is sleep until it is 0. As for the
1036      * advance/delay we gain here, we try to fix it next time.
1037      */
1038     init_delay_params(&sc, cpu);
1039
1040     ret = cpu_exec_setjmp(cpu, &sc);
1041
1042     cpu_exec_exit(cpu);
1043     rcu_read_unlock();
1044
1045     return ret;
1046 }
1047
1048 void tcg_exec_realizefn(CPUState *cpu, Error **errp)
1049 {
1050     static bool tcg_target_initialized;
1051     CPUClass *cc = CPU_GET_CLASS(cpu);
1052
1053     if (!tcg_target_initialized) {
1054         cc->tcg_ops->initialize();
1055         tcg_target_initialized = true;
1056     }
1057
1058     cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1);
1059     tlb_init(cpu);
1060 #ifndef CONFIG_USER_ONLY
1061     tcg_iommu_init_notifier_list(cpu);
1062 #endif /* !CONFIG_USER_ONLY */
1063     /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */
1064 }
1065
1066 /* undo the initializations in reverse order */
1067 void tcg_exec_unrealizefn(CPUState *cpu)
1068 {
1069 #ifndef CONFIG_USER_ONLY
1070     tcg_iommu_free_notifier_list(cpu);
1071 #endif /* !CONFIG_USER_ONLY */
1072
1073     tlb_destroy(cpu);
1074     g_free_rcu(cpu->tb_jmp_cache, rcu);
1075 }
1076
1077 #ifndef CONFIG_USER_ONLY
1078
1079 static void dump_drift_info(GString *buf)
1080 {
1081     if (!icount_enabled()) {
1082         return;
1083     }
1084
1085     g_string_append_printf(buf, "Host - Guest clock  %"PRIi64" ms\n",
1086                            (cpu_get_clock() - icount_get()) / SCALE_MS);
1087     if (icount_align_option) {
1088         g_string_append_printf(buf, "Max guest delay     %"PRIi64" ms\n",
1089                                -max_delay / SCALE_MS);
1090         g_string_append_printf(buf, "Max guest advance   %"PRIi64" ms\n",
1091                                max_advance / SCALE_MS);
1092     } else {
1093         g_string_append_printf(buf, "Max guest delay     NA\n");
1094         g_string_append_printf(buf, "Max guest advance   NA\n");
1095     }
1096 }
1097
1098 HumanReadableText *qmp_x_query_jit(Error **errp)
1099 {
1100     g_autoptr(GString) buf = g_string_new("");
1101
1102     if (!tcg_enabled()) {
1103         error_setg(errp, "JIT information is only available with accel=tcg");
1104         return NULL;
1105     }
1106
1107     dump_exec_info(buf);
1108     dump_drift_info(buf);
1109
1110     return human_readable_text_from_str(buf);
1111 }
1112
1113 HumanReadableText *qmp_x_query_opcount(Error **errp)
1114 {
1115     g_autoptr(GString) buf = g_string_new("");
1116
1117     if (!tcg_enabled()) {
1118         error_setg(errp, "Opcode count information is only available with accel=tcg");
1119         return NULL;
1120     }
1121
1122     tcg_dump_op_count(buf);
1123
1124     return human_readable_text_from_str(buf);
1125 }
1126
1127 #ifdef CONFIG_PROFILER
1128
1129 int64_t dev_time;
1130
1131 HumanReadableText *qmp_x_query_profile(Error **errp)
1132 {
1133     g_autoptr(GString) buf = g_string_new("");
1134     static int64_t last_cpu_exec_time;
1135     int64_t cpu_exec_time;
1136     int64_t delta;
1137
1138     cpu_exec_time = tcg_cpu_exec_time();
1139     delta = cpu_exec_time - last_cpu_exec_time;
1140
1141     g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
1142                            dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
1143     g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
1144                            delta, delta / (double)NANOSECONDS_PER_SECOND);
1145     last_cpu_exec_time = cpu_exec_time;
1146     dev_time = 0;
1147
1148     return human_readable_text_from_str(buf);
1149 }
1150 #else
1151 HumanReadableText *qmp_x_query_profile(Error **errp)
1152 {
1153     error_setg(errp, "Internal profiler not compiled");
1154     return NULL;
1155 }
1156 #endif
1157
1158 #endif /* !CONFIG_USER_ONLY */