]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
cpus: hvf: unregister thread with RCU
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
c97d6d2c 40#include "sysemu/hvf.h"
de0b36b6 41#include "qmp-commands.h"
63c91552 42#include "exec/exec-all.h"
296af7c9 43
1de7afc9 44#include "qemu/thread.h"
9c17d615
PB
45#include "sysemu/cpus.h"
46#include "sysemu/qtest.h"
1de7afc9
PB
47#include "qemu/main-loop.h"
48#include "qemu/bitmap.h"
cb365646 49#include "qemu/seqlock.h"
8d4e9146 50#include "tcg.h"
a4e15de9 51#include "qapi-event.h"
9cb805fd 52#include "hw/nmi.h"
8b427044 53#include "sysemu/replay.h"
afed5a5a 54#include "hw/boards.h"
0ff0fc19 55
6d9cb73c
JK
56#ifdef CONFIG_LINUX
57
58#include <sys/prctl.h>
59
c0532a76
MT
60#ifndef PR_MCE_KILL
61#define PR_MCE_KILL 33
62#endif
63
6d9cb73c
JK
64#ifndef PR_MCE_KILL_SET
65#define PR_MCE_KILL_SET 1
66#endif
67
68#ifndef PR_MCE_KILL_EARLY
69#define PR_MCE_KILL_EARLY 1
70#endif
71
72#endif /* CONFIG_LINUX */
73
27498bef
ST
74int64_t max_delay;
75int64_t max_advance;
296af7c9 76
2adcc85d
JH
77/* vcpu throttling controls */
78static QEMUTimer *throttle_timer;
79static unsigned int throttle_percentage;
80
81#define CPU_THROTTLE_PCT_MIN 1
82#define CPU_THROTTLE_PCT_MAX 99
83#define CPU_THROTTLE_TIMESLICE_NS 10000000
84
321bc0b2
TC
85bool cpu_is_stopped(CPUState *cpu)
86{
87 return cpu->stopped || !runstate_is_running();
88}
89
a98ae1d8 90static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 91{
c64ca814 92 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
93 return false;
94 }
321bc0b2 95 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
96 return true;
97 }
8c2e1b00 98 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 99 kvm_halt_in_kernel()) {
ac873f1e
PM
100 return false;
101 }
102 return true;
103}
104
105static bool all_cpu_threads_idle(void)
106{
182735ef 107 CPUState *cpu;
ac873f1e 108
bdc44640 109 CPU_FOREACH(cpu) {
182735ef 110 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
111 return false;
112 }
113 }
114 return true;
115}
116
946fb27c
PB
117/***********************************************************/
118/* guest cycle counter */
119
a3270e19
PB
120/* Protected by TimersState seqlock */
121
5045e9d9 122static bool icount_sleep = true;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c 128typedef struct TimersState {
cb365646 129 /* Protected by BQL. */
946fb27c
PB
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
cb365646
LPF
132
133 /* cpu_clock_offset can be read out of BQL, so protect it with
134 * this lock.
135 */
136 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
137 int64_t cpu_clock_offset;
138 int32_t cpu_ticks_enabled;
139 int64_t dummy;
c96778bb
FK
140
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias;
143 /* Only written by TCG thread */
144 int64_t qemu_icount;
b39e3f34
PD
145 /* for adjusting icount */
146 int64_t vm_clock_warp_start;
147 QEMUTimer *icount_rt_timer;
148 QEMUTimer *icount_vm_timer;
149 QEMUTimer *icount_warp_timer;
946fb27c
PB
150} TimersState;
151
d9cd4007 152static TimersState timers_state;
8d4e9146
FK
153bool mttcg_enabled;
154
155/*
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 *
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
165 *
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
168 *
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
173 */
174
175static bool check_tcg_memory_orders_compatible(void)
176{
177#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179#else
180 return false;
181#endif
182}
183
184static bool default_mttcg_enabled(void)
185{
83fd9629 186 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
187 return false;
188 } else {
189#ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191#else
192 return false;
193#endif
194 }
195}
196
197void qemu_tcg_configure(QemuOpts *opts, Error **errp)
198{
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 206 } else {
86953503 207#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210#endif
8d4e9146
FK
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
8cfef892 214 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
215 }
216 mttcg_enabled = true;
217 }
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
222 }
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
225 }
226}
946fb27c 227
e4cd9657
AB
228/* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
231 */
232static int64_t cpu_get_icount_executed(CPUState *cpu)
233{
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
235}
236
512d3c80
AB
237/*
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
241 */
242void cpu_update_icount(CPUState *cpu)
243{
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
246
247#ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 atomic_read__nocheck(&timers_state.qemu_icount) +
250 executed);
251#else /* FIXME: we need 64bit atomics to do this safely */
252 timers_state.qemu_icount += executed;
253#endif
254}
255
2a62914b 256int64_t cpu_get_icount_raw(void)
946fb27c 257{
4917cf44 258 CPUState *cpu = current_cpu;
946fb27c 259
243c5f77 260 if (cpu && cpu->running) {
414b15c9 261 if (!cpu->can_do_io) {
2a62914b
PD
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
946fb27c 264 }
e4cd9657 265 /* Take into account what has run */
1d05906b 266 cpu_update_icount(cpu);
946fb27c 267 }
1d05906b
AB
268#ifdef CONFIG_ATOMIC64
269 return atomic_read__nocheck(&timers_state.qemu_icount);
270#else /* FIXME: we need 64bit atomics to do this safely */
271 return timers_state.qemu_icount;
272#endif
2a62914b
PD
273}
274
275/* Return the virtual CPU time, based on the instruction counter. */
276static int64_t cpu_get_icount_locked(void)
277{
278 int64_t icount = cpu_get_icount_raw();
3f031313 279 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
280}
281
17a15f1b
PB
282int64_t cpu_get_icount(void)
283{
284 int64_t icount;
285 unsigned start;
286
287 do {
288 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
289 icount = cpu_get_icount_locked();
290 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
291
292 return icount;
293}
294
3f031313
FK
295int64_t cpu_icount_to_ns(int64_t icount)
296{
297 return icount << icount_time_shift;
298}
299
d90f3cca
C
300/* return the time elapsed in VM between vm_start and vm_stop. Unless
301 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * counter.
303 *
304 * Caller must hold the BQL
305 */
946fb27c
PB
306int64_t cpu_get_ticks(void)
307{
5f3e3101
PB
308 int64_t ticks;
309
946fb27c
PB
310 if (use_icount) {
311 return cpu_get_icount();
312 }
5f3e3101
PB
313
314 ticks = timers_state.cpu_ticks_offset;
315 if (timers_state.cpu_ticks_enabled) {
4a7428c5 316 ticks += cpu_get_host_ticks();
5f3e3101
PB
317 }
318
319 if (timers_state.cpu_ticks_prev > ticks) {
320 /* Note: non increasing ticks may happen if the host uses
321 software suspend */
322 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
323 ticks = timers_state.cpu_ticks_prev;
946fb27c 324 }
5f3e3101
PB
325
326 timers_state.cpu_ticks_prev = ticks;
327 return ticks;
946fb27c
PB
328}
329
cb365646 330static int64_t cpu_get_clock_locked(void)
946fb27c 331{
1d45cea5 332 int64_t time;
cb365646 333
1d45cea5 334 time = timers_state.cpu_clock_offset;
5f3e3101 335 if (timers_state.cpu_ticks_enabled) {
1d45cea5 336 time += get_clock();
946fb27c 337 }
cb365646 338
1d45cea5 339 return time;
cb365646
LPF
340}
341
d90f3cca 342/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
343 * the time between vm_start and vm_stop
344 */
cb365646
LPF
345int64_t cpu_get_clock(void)
346{
347 int64_t ti;
348 unsigned start;
349
350 do {
351 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
352 ti = cpu_get_clock_locked();
353 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
354
355 return ti;
946fb27c
PB
356}
357
cb365646 358/* enable cpu_get_ticks()
3224e878 359 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 360 */
946fb27c
PB
361void cpu_enable_ticks(void)
362{
cb365646 363 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 365 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 366 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
367 timers_state.cpu_clock_offset -= get_clock();
368 timers_state.cpu_ticks_enabled = 1;
369 }
03719e44 370 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
371}
372
373/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 374 * cpu_get_ticks() after that.
3224e878 375 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 376 */
946fb27c
PB
377void cpu_disable_ticks(void)
378{
cb365646 379 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 380 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 381 if (timers_state.cpu_ticks_enabled) {
4a7428c5 382 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 383 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
384 timers_state.cpu_ticks_enabled = 0;
385 }
03719e44 386 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
387}
388
389/* Correlation between real and virtual time is always going to be
390 fairly approximate, so ignore small variation.
391 When the guest is idle real and virtual time will be aligned in
392 the IO wait loop. */
73bcb24d 393#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
394
395static void icount_adjust(void)
396{
397 int64_t cur_time;
398 int64_t cur_icount;
399 int64_t delta;
a3270e19
PB
400
401 /* Protected by TimersState mutex. */
946fb27c 402 static int64_t last_delta;
468cc7cf 403
946fb27c
PB
404 /* If the VM is not running, then do nothing. */
405 if (!runstate_is_running()) {
406 return;
407 }
468cc7cf 408
03719e44 409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
410 cur_time = cpu_get_clock_locked();
411 cur_icount = cpu_get_icount_locked();
468cc7cf 412
946fb27c
PB
413 delta = cur_icount - cur_time;
414 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 if (delta > 0
416 && last_delta + ICOUNT_WOBBLE < delta * 2
417 && icount_time_shift > 0) {
418 /* The guest is getting too far ahead. Slow time down. */
419 icount_time_shift--;
420 }
421 if (delta < 0
422 && last_delta - ICOUNT_WOBBLE > delta * 2
423 && icount_time_shift < MAX_ICOUNT_SHIFT) {
424 /* The guest is getting too far behind. Speed time up. */
425 icount_time_shift++;
426 }
427 last_delta = delta;
c96778bb
FK
428 timers_state.qemu_icount_bias = cur_icount
429 - (timers_state.qemu_icount << icount_time_shift);
03719e44 430 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
431}
432
433static void icount_adjust_rt(void *opaque)
434{
b39e3f34 435 timer_mod(timers_state.icount_rt_timer,
1979b908 436 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
437 icount_adjust();
438}
439
440static void icount_adjust_vm(void *opaque)
441{
b39e3f34 442 timer_mod(timers_state.icount_vm_timer,
40daca54 443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 444 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
445 icount_adjust();
446}
447
448static int64_t qemu_icount_round(int64_t count)
449{
450 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
451}
452
efab87cf 453static void icount_warp_rt(void)
946fb27c 454{
ccffff48
AB
455 unsigned seq;
456 int64_t warp_start;
457
17a15f1b
PB
458 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
459 * changes from -1 to another value, so the race here is okay.
460 */
ccffff48
AB
461 do {
462 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 463 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
464 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
465
466 if (warp_start == -1) {
946fb27c
PB
467 return;
468 }
469
03719e44 470 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 471 if (runstate_is_running()) {
8eda206e
PD
472 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
473 cpu_get_clock_locked());
8ed961d9
PB
474 int64_t warp_delta;
475
b39e3f34 476 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 477 if (use_icount == 2) {
946fb27c 478 /*
40daca54 479 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
480 * far ahead of real time.
481 */
17a15f1b 482 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 483 int64_t delta = clock - cur_icount;
8ed961d9 484 warp_delta = MIN(warp_delta, delta);
946fb27c 485 }
c96778bb 486 timers_state.qemu_icount_bias += warp_delta;
946fb27c 487 }
b39e3f34 488 timers_state.vm_clock_warp_start = -1;
03719e44 489 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
490
491 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
493 }
946fb27c
PB
494}
495
e76d1798 496static void icount_timer_cb(void *opaque)
efab87cf 497{
e76d1798
PD
498 /* No need for a checkpoint because the timer already synchronizes
499 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
500 */
501 icount_warp_rt();
efab87cf
PD
502}
503
8156be56
PB
504void qtest_clock_warp(int64_t dest)
505{
40daca54 506 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 507 AioContext *aio_context;
8156be56 508 assert(qtest_enabled());
efef88b3 509 aio_context = qemu_get_aio_context();
8156be56 510 while (clock < dest) {
40daca54 511 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 512 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 513
03719e44 514 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 515 timers_state.qemu_icount_bias += warp;
03719e44 516 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 517
40daca54 518 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 519 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 521 }
40daca54 522 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
523}
524
e76d1798 525void qemu_start_warp_timer(void)
946fb27c 526{
ce78d18c 527 int64_t clock;
946fb27c
PB
528 int64_t deadline;
529
e76d1798 530 if (!use_icount) {
946fb27c
PB
531 return;
532 }
533
8bd7f71d
PD
534 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
535 * do not fire, so computing the deadline does not make sense.
536 */
537 if (!runstate_is_running()) {
538 return;
539 }
540
541 /* warp clock deterministically in record/replay mode */
e76d1798 542 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
543 return;
544 }
545
ce78d18c 546 if (!all_cpu_threads_idle()) {
946fb27c
PB
547 return;
548 }
549
8156be56
PB
550 if (qtest_enabled()) {
551 /* When testing, qtest commands advance icount. */
e76d1798 552 return;
8156be56
PB
553 }
554
ac70aafc 555 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 558 if (deadline < 0) {
d7a0f71d
VC
559 static bool notified;
560 if (!icount_sleep && !notified) {
3dc6f869 561 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
562 notified = true;
563 }
ce78d18c 564 return;
ac70aafc
AB
565 }
566
946fb27c
PB
567 if (deadline > 0) {
568 /*
40daca54 569 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
570 * sleep. Otherwise, the CPU might be waiting for a future timer
571 * interrupt to wake it up, but the interrupt never comes because
572 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 573 * QEMU_CLOCK_VIRTUAL.
946fb27c 574 */
5045e9d9
VC
575 if (!icount_sleep) {
576 /*
577 * We never let VCPUs sleep in no sleep icount mode.
578 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
579 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
580 * It is useful when we want a deterministic execution time,
581 * isolated from host latencies.
582 */
03719e44 583 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 584 timers_state.qemu_icount_bias += deadline;
03719e44 585 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
03719e44 596 seqlock_write_begin(&timers_state.vm_clock_seqlock);
b39e3f34
PD
597 if (timers_state.vm_clock_warp_start == -1
598 || timers_state.vm_clock_warp_start > clock) {
599 timers_state.vm_clock_warp_start = clock;
5045e9d9 600 }
03719e44 601 seqlock_write_end(&timers_state.vm_clock_seqlock);
b39e3f34
PD
602 timer_mod_anticipate(timers_state.icount_warp_timer,
603 clock + deadline);
ce78d18c 604 }
ac70aafc 605 } else if (deadline == 0) {
40daca54 606 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
607 }
608}
609
e76d1798
PD
610static void qemu_account_warp_timer(void)
611{
612 if (!use_icount || !icount_sleep) {
613 return;
614 }
615
616 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617 * do not fire, so computing the deadline does not make sense.
618 */
619 if (!runstate_is_running()) {
620 return;
621 }
622
623 /* warp clock deterministically in record/replay mode */
624 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625 return;
626 }
627
b39e3f34 628 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
629 icount_warp_rt();
630}
631
d09eae37
FK
632static bool icount_state_needed(void *opaque)
633{
634 return use_icount;
635}
636
b39e3f34
PD
637static bool warp_timer_state_needed(void *opaque)
638{
639 TimersState *s = opaque;
640 return s->icount_warp_timer != NULL;
641}
642
643static bool adjust_timers_state_needed(void *opaque)
644{
645 TimersState *s = opaque;
646 return s->icount_rt_timer != NULL;
647}
648
649/*
650 * Subsection for warp timer migration is optional, because may not be created
651 */
652static const VMStateDescription icount_vmstate_warp_timer = {
653 .name = "timer/icount/warp_timer",
654 .version_id = 1,
655 .minimum_version_id = 1,
656 .needed = warp_timer_state_needed,
657 .fields = (VMStateField[]) {
658 VMSTATE_INT64(vm_clock_warp_start, TimersState),
659 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660 VMSTATE_END_OF_LIST()
661 }
662};
663
664static const VMStateDescription icount_vmstate_adjust_timers = {
665 .name = "timer/icount/timers",
666 .version_id = 1,
667 .minimum_version_id = 1,
668 .needed = adjust_timers_state_needed,
669 .fields = (VMStateField[]) {
670 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672 VMSTATE_END_OF_LIST()
673 }
674};
675
d09eae37
FK
676/*
677 * This is a subsection for icount migration.
678 */
679static const VMStateDescription icount_vmstate_timers = {
680 .name = "timer/icount",
681 .version_id = 1,
682 .minimum_version_id = 1,
5cd8cada 683 .needed = icount_state_needed,
d09eae37
FK
684 .fields = (VMStateField[]) {
685 VMSTATE_INT64(qemu_icount_bias, TimersState),
686 VMSTATE_INT64(qemu_icount, TimersState),
687 VMSTATE_END_OF_LIST()
b39e3f34
PD
688 },
689 .subsections = (const VMStateDescription*[]) {
690 &icount_vmstate_warp_timer,
691 &icount_vmstate_adjust_timers,
692 NULL
d09eae37
FK
693 }
694};
695
946fb27c
PB
696static const VMStateDescription vmstate_timers = {
697 .name = "timer",
698 .version_id = 2,
699 .minimum_version_id = 1,
35d08458 700 .fields = (VMStateField[]) {
946fb27c
PB
701 VMSTATE_INT64(cpu_ticks_offset, TimersState),
702 VMSTATE_INT64(dummy, TimersState),
703 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704 VMSTATE_END_OF_LIST()
d09eae37 705 },
5cd8cada
JQ
706 .subsections = (const VMStateDescription*[]) {
707 &icount_vmstate_timers,
708 NULL
946fb27c
PB
709 }
710};
711
14e6fe12 712static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 713{
2adcc85d
JH
714 double pct;
715 double throttle_ratio;
716 long sleeptime_ns;
717
718 if (!cpu_throttle_get_percentage()) {
719 return;
720 }
721
722 pct = (double)cpu_throttle_get_percentage()/100;
723 throttle_ratio = pct / (1 - pct);
724 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
725
726 qemu_mutex_unlock_iothread();
2adcc85d
JH
727 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
728 qemu_mutex_lock_iothread();
90bb0c04 729 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
730}
731
732static void cpu_throttle_timer_tick(void *opaque)
733{
734 CPUState *cpu;
735 double pct;
736
737 /* Stop the timer if needed */
738 if (!cpu_throttle_get_percentage()) {
739 return;
740 }
741 CPU_FOREACH(cpu) {
742 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
743 async_run_on_cpu(cpu, cpu_throttle_thread,
744 RUN_ON_CPU_NULL);
2adcc85d
JH
745 }
746 }
747
748 pct = (double)cpu_throttle_get_percentage()/100;
749 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
750 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
751}
752
753void cpu_throttle_set(int new_throttle_pct)
754{
755 /* Ensure throttle percentage is within valid range */
756 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
757 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
758
759 atomic_set(&throttle_percentage, new_throttle_pct);
760
761 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
762 CPU_THROTTLE_TIMESLICE_NS);
763}
764
765void cpu_throttle_stop(void)
766{
767 atomic_set(&throttle_percentage, 0);
768}
769
770bool cpu_throttle_active(void)
771{
772 return (cpu_throttle_get_percentage() != 0);
773}
774
775int cpu_throttle_get_percentage(void)
776{
777 return atomic_read(&throttle_percentage);
778}
779
4603ea01
PD
780void cpu_ticks_init(void)
781{
ccdb3c1f 782 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 783 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
784 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
785 cpu_throttle_timer_tick, NULL);
4603ea01
PD
786}
787
1ad9580b 788void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 789{
1ad9580b 790 const char *option;
a8bfac37 791 char *rem_str = NULL;
1ad9580b 792
1ad9580b 793 option = qemu_opt_get(opts, "shift");
946fb27c 794 if (!option) {
a8bfac37
ST
795 if (qemu_opt_get(opts, "align") != NULL) {
796 error_setg(errp, "Please specify shift option when using align");
797 }
946fb27c
PB
798 return;
799 }
f1f4b57e
VC
800
801 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9 802 if (icount_sleep) {
b39e3f34 803 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 804 icount_timer_cb, NULL);
5045e9d9 805 }
f1f4b57e 806
a8bfac37 807 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
808
809 if (icount_align_option && !icount_sleep) {
778d9f9b 810 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 811 }
946fb27c 812 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
813 errno = 0;
814 icount_time_shift = strtol(option, &rem_str, 0);
815 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
816 error_setg(errp, "icount: Invalid shift value");
817 }
946fb27c
PB
818 use_icount = 1;
819 return;
a8bfac37
ST
820 } else if (icount_align_option) {
821 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 822 } else if (!icount_sleep) {
778d9f9b 823 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
824 }
825
826 use_icount = 2;
827
828 /* 125MIPS seems a reasonable initial guess at the guest speed.
829 It will be corrected fairly quickly anyway. */
830 icount_time_shift = 3;
831
832 /* Have both realtime and virtual time triggers for speed adjustment.
833 The realtime trigger catches emulated time passing too slowly,
834 the virtual time trigger catches emulated time passing too fast.
835 Realtime triggers occur even when idle, so use them less frequently
836 than VM triggers. */
b39e3f34
PD
837 timers_state.vm_clock_warp_start = -1;
838 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 839 icount_adjust_rt, NULL);
b39e3f34 840 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 841 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 842 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 843 icount_adjust_vm, NULL);
b39e3f34 844 timer_mod(timers_state.icount_vm_timer,
40daca54 845 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 846 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
847}
848
6546706d
AB
849/***********************************************************/
850/* TCG vCPU kick timer
851 *
852 * The kick timer is responsible for moving single threaded vCPU
853 * emulation on to the next vCPU. If more than one vCPU is running a
854 * timer event with force a cpu->exit so the next vCPU can get
855 * scheduled.
856 *
857 * The timer is removed if all vCPUs are idle and restarted again once
858 * idleness is complete.
859 */
860
861static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 862static CPUState *tcg_current_rr_cpu;
6546706d
AB
863
864#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
865
866static inline int64_t qemu_tcg_next_kick(void)
867{
868 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
869}
870
791158d9
AB
871/* Kick the currently round-robin scheduled vCPU */
872static void qemu_cpu_kick_rr_cpu(void)
873{
874 CPUState *cpu;
791158d9
AB
875 do {
876 cpu = atomic_mb_read(&tcg_current_rr_cpu);
877 if (cpu) {
878 cpu_exit(cpu);
879 }
880 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
881}
882
6b8f0187
PB
883static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
884{
885}
886
3f53bc61
PB
887void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
888{
6b8f0187
PB
889 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
890 qemu_notify_event();
891 return;
892 }
893
894 if (!qemu_in_vcpu_thread() && first_cpu) {
895 /* qemu_cpu_kick is not enough to kick a halted CPU out of
896 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
897 * causes cpu_thread_is_idle to return false. This way,
898 * handle_icount_deadline can run.
899 */
900 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
901 }
3f53bc61
PB
902}
903
6546706d
AB
904static void kick_tcg_thread(void *opaque)
905{
906 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 907 qemu_cpu_kick_rr_cpu();
6546706d
AB
908}
909
910static void start_tcg_kick_timer(void)
911{
db08b687
PB
912 assert(!mttcg_enabled);
913 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
914 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
915 kick_tcg_thread, NULL);
916 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
917 }
918}
919
920static void stop_tcg_kick_timer(void)
921{
db08b687 922 assert(!mttcg_enabled);
6546706d
AB
923 if (tcg_kick_vcpu_timer) {
924 timer_del(tcg_kick_vcpu_timer);
925 tcg_kick_vcpu_timer = NULL;
926 }
927}
928
296af7c9
BS
929/***********************************************************/
930void hw_error(const char *fmt, ...)
931{
932 va_list ap;
55e5c285 933 CPUState *cpu;
296af7c9
BS
934
935 va_start(ap, fmt);
936 fprintf(stderr, "qemu: hardware error: ");
937 vfprintf(stderr, fmt, ap);
938 fprintf(stderr, "\n");
bdc44640 939 CPU_FOREACH(cpu) {
55e5c285 940 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 941 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
942 }
943 va_end(ap);
944 abort();
945}
946
947void cpu_synchronize_all_states(void)
948{
182735ef 949 CPUState *cpu;
296af7c9 950
bdc44640 951 CPU_FOREACH(cpu) {
182735ef 952 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
953 /* TODO: move to cpu_synchronize_state() */
954 if (hvf_enabled()) {
955 hvf_cpu_synchronize_state(cpu);
956 }
296af7c9
BS
957 }
958}
959
960void cpu_synchronize_all_post_reset(void)
961{
182735ef 962 CPUState *cpu;
296af7c9 963
bdc44640 964 CPU_FOREACH(cpu) {
182735ef 965 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
966 /* TODO: move to cpu_synchronize_post_reset() */
967 if (hvf_enabled()) {
968 hvf_cpu_synchronize_post_reset(cpu);
969 }
296af7c9
BS
970 }
971}
972
973void cpu_synchronize_all_post_init(void)
974{
182735ef 975 CPUState *cpu;
296af7c9 976
bdc44640 977 CPU_FOREACH(cpu) {
182735ef 978 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
979 /* TODO: move to cpu_synchronize_post_init() */
980 if (hvf_enabled()) {
981 hvf_cpu_synchronize_post_init(cpu);
982 }
296af7c9
BS
983 }
984}
985
75e972da
DG
986void cpu_synchronize_all_pre_loadvm(void)
987{
988 CPUState *cpu;
989
990 CPU_FOREACH(cpu) {
991 cpu_synchronize_pre_loadvm(cpu);
992 }
993}
994
56983463 995static int do_vm_stop(RunState state)
296af7c9 996{
56983463
KW
997 int ret = 0;
998
1354869c 999 if (runstate_is_running()) {
296af7c9 1000 cpu_disable_ticks();
296af7c9 1001 pause_all_vcpus();
f5bbfba1 1002 runstate_set(state);
1dfb4dd9 1003 vm_state_notify(0, state);
a4e15de9 1004 qapi_event_send_stop(&error_abort);
296af7c9 1005 }
56983463 1006
594a45ce 1007 bdrv_drain_all();
6d0ceb80 1008 replay_disable_events();
22af08ea 1009 ret = bdrv_flush_all();
594a45ce 1010
56983463 1011 return ret;
296af7c9
BS
1012}
1013
a1fcaa73 1014static bool cpu_can_run(CPUState *cpu)
296af7c9 1015{
4fdeee7c 1016 if (cpu->stop) {
a1fcaa73 1017 return false;
0ab07c62 1018 }
321bc0b2 1019 if (cpu_is_stopped(cpu)) {
a1fcaa73 1020 return false;
0ab07c62 1021 }
a1fcaa73 1022 return true;
296af7c9
BS
1023}
1024
91325046 1025static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1026{
64f6b346 1027 gdb_set_stop_cpu(cpu);
8cf71710 1028 qemu_system_debug_request();
f324e766 1029 cpu->stopped = true;
3c638d06
JK
1030}
1031
6d9cb73c
JK
1032#ifdef CONFIG_LINUX
1033static void sigbus_reraise(void)
1034{
1035 sigset_t set;
1036 struct sigaction action;
1037
1038 memset(&action, 0, sizeof(action));
1039 action.sa_handler = SIG_DFL;
1040 if (!sigaction(SIGBUS, &action, NULL)) {
1041 raise(SIGBUS);
1042 sigemptyset(&set);
1043 sigaddset(&set, SIGBUS);
a2d1761d 1044 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1045 }
1046 perror("Failed to re-raise SIGBUS!\n");
1047 abort();
1048}
1049
d98d4072 1050static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1051{
a16fc07e
PB
1052 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1053 sigbus_reraise();
1054 }
1055
2ae41db2
PB
1056 if (current_cpu) {
1057 /* Called asynchronously in VCPU thread. */
1058 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1059 sigbus_reraise();
1060 }
1061 } else {
1062 /* Called synchronously (via signalfd) in main thread. */
1063 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1064 sigbus_reraise();
1065 }
6d9cb73c
JK
1066 }
1067}
1068
1069static void qemu_init_sigbus(void)
1070{
1071 struct sigaction action;
1072
1073 memset(&action, 0, sizeof(action));
1074 action.sa_flags = SA_SIGINFO;
d98d4072 1075 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1076 sigaction(SIGBUS, &action, NULL);
1077
1078 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1079}
6d9cb73c 1080#else /* !CONFIG_LINUX */
6d9cb73c
JK
1081static void qemu_init_sigbus(void)
1082{
1083}
a16fc07e 1084#endif /* !CONFIG_LINUX */
ff48eb5f 1085
b2532d88 1086static QemuMutex qemu_global_mutex;
296af7c9
BS
1087
1088static QemuThread io_thread;
1089
296af7c9
BS
1090/* cpu creation */
1091static QemuCond qemu_cpu_cond;
1092/* system init */
296af7c9
BS
1093static QemuCond qemu_pause_cond;
1094
d3b12f5d 1095void qemu_init_cpu_loop(void)
296af7c9 1096{
6d9cb73c 1097 qemu_init_sigbus();
ed94592b 1098 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1099 qemu_cond_init(&qemu_pause_cond);
296af7c9 1100 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1101
b7680cb6 1102 qemu_thread_get_self(&io_thread);
296af7c9
BS
1103}
1104
14e6fe12 1105void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1106{
d148d90e 1107 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1108}
1109
4c055ab5
GZ
1110static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1111{
1112 if (kvm_destroy_vcpu(cpu) < 0) {
1113 error_report("kvm_destroy_vcpu failed");
1114 exit(EXIT_FAILURE);
1115 }
1116}
1117
1118static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1119{
1120}
1121
ebd05fea
DH
1122static void qemu_cpu_stop(CPUState *cpu, bool exit)
1123{
1124 g_assert(qemu_cpu_is_self(cpu));
1125 cpu->stop = false;
1126 cpu->stopped = true;
1127 if (exit) {
1128 cpu_exit(cpu);
1129 }
1130 qemu_cond_broadcast(&qemu_pause_cond);
1131}
1132
509a0d78 1133static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1134{
37257942 1135 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1136 if (cpu->stop) {
ebd05fea 1137 qemu_cpu_stop(cpu, false);
296af7c9 1138 }
a5403c69 1139 process_queued_cpu_work(cpu);
37257942
AB
1140}
1141
db08b687 1142static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
37257942 1143{
db08b687 1144 while (all_cpu_threads_idle()) {
6546706d 1145 stop_tcg_kick_timer();
d5f8d613 1146 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1147 }
296af7c9 1148
6546706d
AB
1149 start_tcg_kick_timer();
1150
37257942 1151 qemu_wait_io_event_common(cpu);
296af7c9
BS
1152}
1153
db08b687 1154static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1155{
a98ae1d8 1156 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1157 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1158 }
296af7c9 1159
db08b687
PB
1160#ifdef _WIN32
1161 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1162 if (!tcg_enabled()) {
1163 SleepEx(0, TRUE);
c97d6d2c 1164 }
db08b687 1165#endif
c97d6d2c
SAGDR
1166 qemu_wait_io_event_common(cpu);
1167}
1168
7e97cd88 1169static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1170{
48a106bd 1171 CPUState *cpu = arg;
84b4915d 1172 int r;
296af7c9 1173
ab28bd23
PB
1174 rcu_register_thread();
1175
2e7f7a3c 1176 qemu_mutex_lock_iothread();
814e612e 1177 qemu_thread_get_self(cpu->thread);
9f09e18a 1178 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1179 cpu->can_do_io = 1;
4917cf44 1180 current_cpu = cpu;
296af7c9 1181
504134d2 1182 r = kvm_init_vcpu(cpu);
84b4915d
JK
1183 if (r < 0) {
1184 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1185 exit(1);
1186 }
296af7c9 1187
18268b60 1188 kvm_init_cpu_signals(cpu);
296af7c9
BS
1189
1190 /* signal CPU creation */
61a46217 1191 cpu->created = true;
296af7c9
BS
1192 qemu_cond_signal(&qemu_cpu_cond);
1193
4c055ab5 1194 do {
a1fcaa73 1195 if (cpu_can_run(cpu)) {
1458c363 1196 r = kvm_cpu_exec(cpu);
83f338f7 1197 if (r == EXCP_DEBUG) {
91325046 1198 cpu_handle_guest_debug(cpu);
83f338f7 1199 }
0ab07c62 1200 }
db08b687 1201 qemu_wait_io_event(cpu);
4c055ab5 1202 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1203
4c055ab5 1204 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1205 cpu->created = false;
1206 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1207 qemu_mutex_unlock_iothread();
57615ed5 1208 rcu_unregister_thread();
296af7c9
BS
1209 return NULL;
1210}
1211
c7f0f3b1
AL
1212static void *qemu_dummy_cpu_thread_fn(void *arg)
1213{
1214#ifdef _WIN32
1215 fprintf(stderr, "qtest is not supported under Windows\n");
1216 exit(1);
1217#else
10a9021d 1218 CPUState *cpu = arg;
c7f0f3b1
AL
1219 sigset_t waitset;
1220 int r;
1221
ab28bd23
PB
1222 rcu_register_thread();
1223
c7f0f3b1 1224 qemu_mutex_lock_iothread();
814e612e 1225 qemu_thread_get_self(cpu->thread);
9f09e18a 1226 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1227 cpu->can_do_io = 1;
37257942 1228 current_cpu = cpu;
c7f0f3b1
AL
1229
1230 sigemptyset(&waitset);
1231 sigaddset(&waitset, SIG_IPI);
1232
1233 /* signal CPU creation */
61a46217 1234 cpu->created = true;
c7f0f3b1
AL
1235 qemu_cond_signal(&qemu_cpu_cond);
1236
d2831ab0 1237 do {
c7f0f3b1
AL
1238 qemu_mutex_unlock_iothread();
1239 do {
1240 int sig;
1241 r = sigwait(&waitset, &sig);
1242 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1243 if (r == -1) {
1244 perror("sigwait");
1245 exit(1);
1246 }
1247 qemu_mutex_lock_iothread();
db08b687 1248 qemu_wait_io_event(cpu);
d2831ab0 1249 } while (!cpu->unplug);
c7f0f3b1 1250
d2831ab0 1251 rcu_unregister_thread();
c7f0f3b1
AL
1252 return NULL;
1253#endif
1254}
1255
1be7fcb8
AB
1256static int64_t tcg_get_icount_limit(void)
1257{
1258 int64_t deadline;
1259
1260 if (replay_mode != REPLAY_MODE_PLAY) {
1261 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1262
1263 /* Maintain prior (possibly buggy) behaviour where if no deadline
1264 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1265 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1266 * nanoseconds.
1267 */
1268 if ((deadline < 0) || (deadline > INT32_MAX)) {
1269 deadline = INT32_MAX;
1270 }
1271
1272 return qemu_icount_round(deadline);
1273 } else {
1274 return replay_get_instructions();
1275 }
1276}
1277
12e9700d
AB
1278static void handle_icount_deadline(void)
1279{
6b8f0187 1280 assert(qemu_in_vcpu_thread());
12e9700d
AB
1281 if (use_icount) {
1282 int64_t deadline =
1283 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1284
1285 if (deadline == 0) {
6b8f0187 1286 /* Wake up other AioContexts. */
12e9700d 1287 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1288 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1289 }
1290 }
1291}
1292
05248382 1293static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1294{
1be7fcb8 1295 if (use_icount) {
eda5f7c6 1296 int insns_left;
05248382
AB
1297
1298 /* These should always be cleared by process_icount_data after
1299 * each vCPU execution. However u16.high can be raised
1300 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1301 */
1302 g_assert(cpu->icount_decr.u16.low == 0);
1303 g_assert(cpu->icount_extra == 0);
1304
eda5f7c6
AB
1305 cpu->icount_budget = tcg_get_icount_limit();
1306 insns_left = MIN(0xffff, cpu->icount_budget);
1307 cpu->icount_decr.u16.low = insns_left;
1308 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1309 }
05248382
AB
1310}
1311
1312static void process_icount_data(CPUState *cpu)
1313{
1be7fcb8 1314 if (use_icount) {
e4cd9657 1315 /* Account for executed instructions */
512d3c80 1316 cpu_update_icount(cpu);
05248382
AB
1317
1318 /* Reset the counters */
1319 cpu->icount_decr.u16.low = 0;
1be7fcb8 1320 cpu->icount_extra = 0;
e4cd9657
AB
1321 cpu->icount_budget = 0;
1322
1be7fcb8
AB
1323 replay_account_executed_instructions();
1324 }
05248382
AB
1325}
1326
1327
1328static int tcg_cpu_exec(CPUState *cpu)
1329{
1330 int ret;
1331#ifdef CONFIG_PROFILER
1332 int64_t ti;
1333#endif
1334
1335#ifdef CONFIG_PROFILER
1336 ti = profile_getclock();
1337#endif
1338 qemu_mutex_unlock_iothread();
1339 cpu_exec_start(cpu);
1340 ret = cpu_exec(cpu);
1341 cpu_exec_end(cpu);
1342 qemu_mutex_lock_iothread();
1343#ifdef CONFIG_PROFILER
1344 tcg_time += profile_getclock() - ti;
1345#endif
1be7fcb8
AB
1346 return ret;
1347}
1348
c93bbbef
AB
1349/* Destroy any remaining vCPUs which have been unplugged and have
1350 * finished running
1351 */
1352static void deal_with_unplugged_cpus(void)
1be7fcb8 1353{
c93bbbef 1354 CPUState *cpu;
1be7fcb8 1355
c93bbbef
AB
1356 CPU_FOREACH(cpu) {
1357 if (cpu->unplug && !cpu_can_run(cpu)) {
1358 qemu_tcg_destroy_vcpu(cpu);
1359 cpu->created = false;
1360 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1361 break;
1362 }
1363 }
1be7fcb8 1364}
bdb7ca67 1365
6546706d
AB
1366/* Single-threaded TCG
1367 *
1368 * In the single-threaded case each vCPU is simulated in turn. If
1369 * there is more than a single vCPU we create a simple timer to kick
1370 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1371 * This is done explicitly rather than relying on side-effects
1372 * elsewhere.
1373 */
1374
37257942 1375static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1376{
c3586ba7 1377 CPUState *cpu = arg;
296af7c9 1378
ab28bd23 1379 rcu_register_thread();
3468b59e 1380 tcg_register_thread();
ab28bd23 1381
2e7f7a3c 1382 qemu_mutex_lock_iothread();
814e612e 1383 qemu_thread_get_self(cpu->thread);
296af7c9 1384
38fcbd3f
AF
1385 CPU_FOREACH(cpu) {
1386 cpu->thread_id = qemu_get_thread_id();
1387 cpu->created = true;
626cf8f4 1388 cpu->can_do_io = 1;
38fcbd3f 1389 }
296af7c9
BS
1390 qemu_cond_signal(&qemu_cpu_cond);
1391
fa7d1867 1392 /* wait for initial kick-off after machine start */
c28e399c 1393 while (first_cpu->stopped) {
d5f8d613 1394 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1395
1396 /* process any pending work */
bdc44640 1397 CPU_FOREACH(cpu) {
37257942 1398 current_cpu = cpu;
182735ef 1399 qemu_wait_io_event_common(cpu);
8e564b4e 1400 }
0ab07c62 1401 }
296af7c9 1402
6546706d
AB
1403 start_tcg_kick_timer();
1404
c93bbbef
AB
1405 cpu = first_cpu;
1406
e5143e30
AB
1407 /* process any pending work */
1408 cpu->exit_request = 1;
1409
296af7c9 1410 while (1) {
c93bbbef
AB
1411 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1412 qemu_account_warp_timer();
1413
6b8f0187
PB
1414 /* Run the timers here. This is much more efficient than
1415 * waking up the I/O thread and waiting for completion.
1416 */
1417 handle_icount_deadline();
1418
c93bbbef
AB
1419 if (!cpu) {
1420 cpu = first_cpu;
1421 }
1422
e5143e30
AB
1423 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1424
791158d9 1425 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1426 current_cpu = cpu;
c93bbbef
AB
1427
1428 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1429 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1430
1431 if (cpu_can_run(cpu)) {
1432 int r;
05248382
AB
1433
1434 prepare_icount_for_run(cpu);
1435
c93bbbef 1436 r = tcg_cpu_exec(cpu);
05248382
AB
1437
1438 process_icount_data(cpu);
1439
c93bbbef
AB
1440 if (r == EXCP_DEBUG) {
1441 cpu_handle_guest_debug(cpu);
1442 break;
08e73c48
PK
1443 } else if (r == EXCP_ATOMIC) {
1444 qemu_mutex_unlock_iothread();
1445 cpu_exec_step_atomic(cpu);
1446 qemu_mutex_lock_iothread();
1447 break;
c93bbbef 1448 }
37257942 1449 } else if (cpu->stop) {
c93bbbef
AB
1450 if (cpu->unplug) {
1451 cpu = CPU_NEXT(cpu);
1452 }
1453 break;
1454 }
1455
e5143e30
AB
1456 cpu = CPU_NEXT(cpu);
1457 } /* while (cpu && !cpu->exit_request).. */
1458
791158d9
AB
1459 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1460 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1461
e5143e30
AB
1462 if (cpu && cpu->exit_request) {
1463 atomic_mb_set(&cpu->exit_request, 0);
1464 }
ac70aafc 1465
db08b687 1466 qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1467 deal_with_unplugged_cpus();
296af7c9
BS
1468 }
1469
9b0605f9 1470 rcu_unregister_thread();
296af7c9
BS
1471 return NULL;
1472}
1473
b0cb0a66
VP
1474static void *qemu_hax_cpu_thread_fn(void *arg)
1475{
1476 CPUState *cpu = arg;
1477 int r;
b3d3a426 1478
9857c2d2 1479 rcu_register_thread();
b3d3a426 1480 qemu_mutex_lock_iothread();
b0cb0a66 1481 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1482
1483 cpu->thread_id = qemu_get_thread_id();
1484 cpu->created = true;
1485 cpu->halted = 0;
1486 current_cpu = cpu;
1487
1488 hax_init_vcpu(cpu);
1489 qemu_cond_signal(&qemu_cpu_cond);
1490
9857c2d2 1491 do {
b0cb0a66
VP
1492 if (cpu_can_run(cpu)) {
1493 r = hax_smp_cpu_exec(cpu);
1494 if (r == EXCP_DEBUG) {
1495 cpu_handle_guest_debug(cpu);
1496 }
1497 }
1498
db08b687 1499 qemu_wait_io_event(cpu);
9857c2d2
PB
1500 } while (!cpu->unplug || cpu_can_run(cpu));
1501 rcu_unregister_thread();
b0cb0a66
VP
1502 return NULL;
1503}
1504
c97d6d2c
SAGDR
1505/* The HVF-specific vCPU thread function. This one should only run when the host
1506 * CPU supports the VMX "unrestricted guest" feature. */
1507static void *qemu_hvf_cpu_thread_fn(void *arg)
1508{
1509 CPUState *cpu = arg;
1510
1511 int r;
1512
1513 assert(hvf_enabled());
1514
1515 rcu_register_thread();
1516
1517 qemu_mutex_lock_iothread();
1518 qemu_thread_get_self(cpu->thread);
1519
1520 cpu->thread_id = qemu_get_thread_id();
1521 cpu->can_do_io = 1;
1522 current_cpu = cpu;
1523
1524 hvf_init_vcpu(cpu);
1525
1526 /* signal CPU creation */
1527 cpu->created = true;
1528 qemu_cond_signal(&qemu_cpu_cond);
1529
1530 do {
1531 if (cpu_can_run(cpu)) {
1532 r = hvf_vcpu_exec(cpu);
1533 if (r == EXCP_DEBUG) {
1534 cpu_handle_guest_debug(cpu);
1535 }
1536 }
db08b687 1537 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1538 } while (!cpu->unplug || cpu_can_run(cpu));
1539
1540 hvf_vcpu_destroy(cpu);
1541 cpu->created = false;
1542 qemu_cond_signal(&qemu_cpu_cond);
1543 qemu_mutex_unlock_iothread();
8178e637 1544 rcu_unregister_thread();
c97d6d2c
SAGDR
1545 return NULL;
1546}
1547
b0cb0a66
VP
1548#ifdef _WIN32
1549static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1550{
1551}
1552#endif
1553
37257942
AB
1554/* Multi-threaded TCG
1555 *
1556 * In the multi-threaded case each vCPU has its own thread. The TLS
1557 * variable current_cpu can be used deep in the code to find the
1558 * current CPUState for a given thread.
1559 */
1560
1561static void *qemu_tcg_cpu_thread_fn(void *arg)
1562{
1563 CPUState *cpu = arg;
1564
bf51c720
AB
1565 g_assert(!use_icount);
1566
37257942 1567 rcu_register_thread();
3468b59e 1568 tcg_register_thread();
37257942
AB
1569
1570 qemu_mutex_lock_iothread();
1571 qemu_thread_get_self(cpu->thread);
1572
1573 cpu->thread_id = qemu_get_thread_id();
1574 cpu->created = true;
1575 cpu->can_do_io = 1;
1576 current_cpu = cpu;
1577 qemu_cond_signal(&qemu_cpu_cond);
1578
1579 /* process any pending work */
1580 cpu->exit_request = 1;
1581
1582 while (1) {
1583 if (cpu_can_run(cpu)) {
1584 int r;
1585 r = tcg_cpu_exec(cpu);
1586 switch (r) {
1587 case EXCP_DEBUG:
1588 cpu_handle_guest_debug(cpu);
1589 break;
1590 case EXCP_HALTED:
1591 /* during start-up the vCPU is reset and the thread is
1592 * kicked several times. If we don't ensure we go back
1593 * to sleep in the halted state we won't cleanly
1594 * start-up when the vCPU is enabled.
1595 *
1596 * cpu->halted should ensure we sleep in wait_io_event
1597 */
1598 g_assert(cpu->halted);
1599 break;
08e73c48
PK
1600 case EXCP_ATOMIC:
1601 qemu_mutex_unlock_iothread();
1602 cpu_exec_step_atomic(cpu);
1603 qemu_mutex_lock_iothread();
37257942
AB
1604 default:
1605 /* Ignore everything else? */
1606 break;
1607 }
1608 }
1609
37257942 1610 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1611 qemu_wait_io_event(cpu);
9b0605f9 1612 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1613
9b0605f9
PB
1614 qemu_tcg_destroy_vcpu(cpu);
1615 cpu->created = false;
1616 qemu_cond_signal(&qemu_cpu_cond);
1617 qemu_mutex_unlock_iothread();
1618 rcu_unregister_thread();
37257942
AB
1619 return NULL;
1620}
1621
2ff09a40 1622static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1623{
1624#ifndef _WIN32
1625 int err;
1626
e0c38211
PB
1627 if (cpu->thread_kicked) {
1628 return;
9102deda 1629 }
e0c38211 1630 cpu->thread_kicked = true;
814e612e 1631 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1632 if (err) {
1633 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1634 exit(1);
1635 }
1636#else /* _WIN32 */
b0cb0a66
VP
1637 if (!qemu_cpu_is_self(cpu)) {
1638 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1639 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1640 __func__, GetLastError());
1641 exit(1);
1642 }
1643 }
e0c38211
PB
1644#endif
1645}
ed9164a3 1646
c08d7424 1647void qemu_cpu_kick(CPUState *cpu)
296af7c9 1648{
f5c121b8 1649 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1650 if (tcg_enabled()) {
791158d9 1651 cpu_exit(cpu);
37257942 1652 /* NOP unless doing single-thread RR */
791158d9 1653 qemu_cpu_kick_rr_cpu();
e0c38211 1654 } else {
b0cb0a66
VP
1655 if (hax_enabled()) {
1656 /*
1657 * FIXME: race condition with the exit_request check in
1658 * hax_vcpu_hax_exec
1659 */
1660 cpu->exit_request = 1;
1661 }
e0c38211
PB
1662 qemu_cpu_kick_thread(cpu);
1663 }
296af7c9
BS
1664}
1665
46d62fac 1666void qemu_cpu_kick_self(void)
296af7c9 1667{
4917cf44 1668 assert(current_cpu);
9102deda 1669 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1670}
1671
60e82579 1672bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1673{
814e612e 1674 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1675}
1676
79e2b9ae 1677bool qemu_in_vcpu_thread(void)
aa723c23 1678{
4917cf44 1679 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1680}
1681
afbe7053
PB
1682static __thread bool iothread_locked = false;
1683
1684bool qemu_mutex_iothread_locked(void)
1685{
1686 return iothread_locked;
1687}
1688
296af7c9
BS
1689void qemu_mutex_lock_iothread(void)
1690{
8d04fb55
JK
1691 g_assert(!qemu_mutex_iothread_locked());
1692 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1693 iothread_locked = true;
296af7c9
BS
1694}
1695
1696void qemu_mutex_unlock_iothread(void)
1697{
8d04fb55 1698 g_assert(qemu_mutex_iothread_locked());
afbe7053 1699 iothread_locked = false;
296af7c9
BS
1700 qemu_mutex_unlock(&qemu_global_mutex);
1701}
1702
e8faee06 1703static bool all_vcpus_paused(void)
296af7c9 1704{
bdc44640 1705 CPUState *cpu;
296af7c9 1706
bdc44640 1707 CPU_FOREACH(cpu) {
182735ef 1708 if (!cpu->stopped) {
e8faee06 1709 return false;
0ab07c62 1710 }
296af7c9
BS
1711 }
1712
e8faee06 1713 return true;
296af7c9
BS
1714}
1715
1716void pause_all_vcpus(void)
1717{
bdc44640 1718 CPUState *cpu;
296af7c9 1719
40daca54 1720 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1721 CPU_FOREACH(cpu) {
ebd05fea
DH
1722 if (qemu_cpu_is_self(cpu)) {
1723 qemu_cpu_stop(cpu, true);
1724 } else {
1725 cpu->stop = true;
1726 qemu_cpu_kick(cpu);
1727 }
d798e974
JK
1728 }
1729
296af7c9 1730 while (!all_vcpus_paused()) {
be7d6c57 1731 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1732 CPU_FOREACH(cpu) {
182735ef 1733 qemu_cpu_kick(cpu);
296af7c9
BS
1734 }
1735 }
1736}
1737
2993683b
IM
1738void cpu_resume(CPUState *cpu)
1739{
1740 cpu->stop = false;
1741 cpu->stopped = false;
1742 qemu_cpu_kick(cpu);
1743}
1744
296af7c9
BS
1745void resume_all_vcpus(void)
1746{
bdc44640 1747 CPUState *cpu;
296af7c9 1748
40daca54 1749 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1750 CPU_FOREACH(cpu) {
182735ef 1751 cpu_resume(cpu);
296af7c9
BS
1752 }
1753}
1754
4c055ab5
GZ
1755void cpu_remove(CPUState *cpu)
1756{
1757 cpu->stop = true;
1758 cpu->unplug = true;
1759 qemu_cpu_kick(cpu);
1760}
1761
2c579042
BR
1762void cpu_remove_sync(CPUState *cpu)
1763{
1764 cpu_remove(cpu);
1765 while (cpu->created) {
1766 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1767 }
1768}
1769
4900116e
DDAG
1770/* For temporary buffers for forming a name */
1771#define VCPU_THREAD_NAME_SIZE 16
1772
e5ab30a2 1773static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1774{
4900116e 1775 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1776 static QemuCond *single_tcg_halt_cond;
1777 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1778 static int tcg_region_inited;
1779
1780 /*
1781 * Initialize TCG regions--once. Now is a good time, because:
1782 * (1) TCG's init context, prologue and target globals have been set up.
1783 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1784 * -accel flag is processed, so the check doesn't work then).
1785 */
1786 if (!tcg_region_inited) {
1787 tcg_region_inited = 1;
1788 tcg_region_init();
1789 }
4900116e 1790
37257942 1791 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1792 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1793 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1794 qemu_cond_init(cpu->halt_cond);
37257942
AB
1795
1796 if (qemu_tcg_mttcg_enabled()) {
1797 /* create a thread per vCPU with TCG (MTTCG) */
1798 parallel_cpus = true;
1799 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1800 cpu->cpu_index);
37257942
AB
1801
1802 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1803 cpu, QEMU_THREAD_JOINABLE);
1804
1805 } else {
1806 /* share a single thread for all cpus with TCG */
1807 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1808 qemu_thread_create(cpu->thread, thread_name,
1809 qemu_tcg_rr_cpu_thread_fn,
1810 cpu, QEMU_THREAD_JOINABLE);
1811
1812 single_tcg_halt_cond = cpu->halt_cond;
1813 single_tcg_cpu_thread = cpu->thread;
1814 }
1ecf47bf 1815#ifdef _WIN32
814e612e 1816 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1817#endif
61a46217 1818 while (!cpu->created) {
18a85728 1819 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1820 }
296af7c9 1821 } else {
37257942
AB
1822 /* For non-MTTCG cases we share the thread */
1823 cpu->thread = single_tcg_cpu_thread;
1824 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1825 }
1826}
1827
b0cb0a66
VP
1828static void qemu_hax_start_vcpu(CPUState *cpu)
1829{
1830 char thread_name[VCPU_THREAD_NAME_SIZE];
1831
1832 cpu->thread = g_malloc0(sizeof(QemuThread));
1833 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1834 qemu_cond_init(cpu->halt_cond);
1835
1836 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1837 cpu->cpu_index);
1838 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1839 cpu, QEMU_THREAD_JOINABLE);
1840#ifdef _WIN32
1841 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1842#endif
1843 while (!cpu->created) {
1844 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1845 }
1846}
1847
48a106bd 1848static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1849{
4900116e
DDAG
1850 char thread_name[VCPU_THREAD_NAME_SIZE];
1851
814e612e 1852 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1853 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1854 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1855 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1856 cpu->cpu_index);
1857 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1858 cpu, QEMU_THREAD_JOINABLE);
61a46217 1859 while (!cpu->created) {
18a85728 1860 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1861 }
296af7c9
BS
1862}
1863
c97d6d2c
SAGDR
1864static void qemu_hvf_start_vcpu(CPUState *cpu)
1865{
1866 char thread_name[VCPU_THREAD_NAME_SIZE];
1867
1868 /* HVF currently does not support TCG, and only runs in
1869 * unrestricted-guest mode. */
1870 assert(hvf_enabled());
1871
1872 cpu->thread = g_malloc0(sizeof(QemuThread));
1873 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1874 qemu_cond_init(cpu->halt_cond);
1875
1876 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1877 cpu->cpu_index);
1878 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1879 cpu, QEMU_THREAD_JOINABLE);
1880 while (!cpu->created) {
1881 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1882 }
1883}
1884
10a9021d 1885static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1886{
4900116e
DDAG
1887 char thread_name[VCPU_THREAD_NAME_SIZE];
1888
814e612e 1889 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1890 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1891 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1892 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1893 cpu->cpu_index);
1894 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1895 QEMU_THREAD_JOINABLE);
61a46217 1896 while (!cpu->created) {
c7f0f3b1
AL
1897 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1898 }
1899}
1900
c643bed9 1901void qemu_init_vcpu(CPUState *cpu)
296af7c9 1902{
ce3960eb
AF
1903 cpu->nr_cores = smp_cores;
1904 cpu->nr_threads = smp_threads;
f324e766 1905 cpu->stopped = true;
56943e8c
PM
1906
1907 if (!cpu->as) {
1908 /* If the target cpu hasn't set up any address spaces itself,
1909 * give it the default one.
1910 */
12ebc9a7 1911 cpu->num_ases = 1;
80ceb07a 1912 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
1913 }
1914
0ab07c62 1915 if (kvm_enabled()) {
48a106bd 1916 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1917 } else if (hax_enabled()) {
1918 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
1919 } else if (hvf_enabled()) {
1920 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 1921 } else if (tcg_enabled()) {
e5ab30a2 1922 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1923 } else {
10a9021d 1924 qemu_dummy_start_vcpu(cpu);
0ab07c62 1925 }
296af7c9
BS
1926}
1927
b4a3d965 1928void cpu_stop_current(void)
296af7c9 1929{
4917cf44 1930 if (current_cpu) {
ebd05fea 1931 qemu_cpu_stop(current_cpu, true);
b4a3d965 1932 }
296af7c9
BS
1933}
1934
56983463 1935int vm_stop(RunState state)
296af7c9 1936{
aa723c23 1937 if (qemu_in_vcpu_thread()) {
74892d24 1938 qemu_system_vmstop_request_prepare();
1dfb4dd9 1939 qemu_system_vmstop_request(state);
296af7c9
BS
1940 /*
1941 * FIXME: should not return to device code in case
1942 * vm_stop() has been requested.
1943 */
b4a3d965 1944 cpu_stop_current();
56983463 1945 return 0;
296af7c9 1946 }
56983463
KW
1947
1948 return do_vm_stop(state);
296af7c9
BS
1949}
1950
2d76e823
CI
1951/**
1952 * Prepare for (re)starting the VM.
1953 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1954 * running or in case of an error condition), 0 otherwise.
1955 */
1956int vm_prepare_start(void)
1957{
1958 RunState requested;
1959 int res = 0;
1960
1961 qemu_vmstop_requested(&requested);
1962 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1963 return -1;
1964 }
1965
1966 /* Ensure that a STOP/RESUME pair of events is emitted if a
1967 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1968 * example, according to documentation is always followed by
1969 * the STOP event.
1970 */
1971 if (runstate_is_running()) {
1972 qapi_event_send_stop(&error_abort);
1973 res = -1;
1974 } else {
1975 replay_enable_events();
1976 cpu_enable_ticks();
1977 runstate_set(RUN_STATE_RUNNING);
1978 vm_state_notify(1, RUN_STATE_RUNNING);
1979 }
1980
1981 /* We are sending this now, but the CPUs will be resumed shortly later */
1982 qapi_event_send_resume(&error_abort);
1983 return res;
1984}
1985
1986void vm_start(void)
1987{
1988 if (!vm_prepare_start()) {
1989 resume_all_vcpus();
1990 }
1991}
1992
8a9236f1
LC
1993/* does a state transition even if the VM is already stopped,
1994 current state is forgotten forever */
56983463 1995int vm_stop_force_state(RunState state)
8a9236f1
LC
1996{
1997 if (runstate_is_running()) {
56983463 1998 return vm_stop(state);
8a9236f1
LC
1999 } else {
2000 runstate_set(state);
b2780d32
WC
2001
2002 bdrv_drain_all();
594a45ce
KW
2003 /* Make sure to return an error if the flush in a previous vm_stop()
2004 * failed. */
22af08ea 2005 return bdrv_flush_all();
8a9236f1
LC
2006 }
2007}
2008
9a78eead 2009void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
2010{
2011 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
2012#if defined(cpu_list)
2013 cpu_list(f, cpu_fprintf);
262353cb
BS
2014#endif
2015}
de0b36b6
LC
2016
2017CpuInfoList *qmp_query_cpus(Error **errp)
2018{
afed5a5a
IM
2019 MachineState *ms = MACHINE(qdev_get_machine());
2020 MachineClass *mc = MACHINE_GET_CLASS(ms);
de0b36b6 2021 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 2022 CPUState *cpu;
de0b36b6 2023
bdc44640 2024 CPU_FOREACH(cpu) {
de0b36b6 2025 CpuInfoList *info;
182735ef
AF
2026#if defined(TARGET_I386)
2027 X86CPU *x86_cpu = X86_CPU(cpu);
2028 CPUX86State *env = &x86_cpu->env;
2029#elif defined(TARGET_PPC)
2030 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2031 CPUPPCState *env = &ppc_cpu->env;
2032#elif defined(TARGET_SPARC)
2033 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2034 CPUSPARCState *env = &sparc_cpu->env;
2035#elif defined(TARGET_MIPS)
2036 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2037 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
2038#elif defined(TARGET_TRICORE)
2039 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2040 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 2041#endif
de0b36b6 2042
cb446eca 2043 cpu_synchronize_state(cpu);
de0b36b6
LC
2044
2045 info = g_malloc0(sizeof(*info));
2046 info->value = g_malloc0(sizeof(*info->value));
55e5c285 2047 info->value->CPU = cpu->cpu_index;
182735ef 2048 info->value->current = (cpu == first_cpu);
259186a7 2049 info->value->halted = cpu->halted;
58f88d4b 2050 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 2051 info->value->thread_id = cpu->thread_id;
de0b36b6 2052#if defined(TARGET_I386)
86f4b687 2053 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 2054 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 2055#elif defined(TARGET_PPC)
86f4b687 2056 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 2057 info->value->u.ppc.nip = env->nip;
de0b36b6 2058#elif defined(TARGET_SPARC)
86f4b687 2059 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
2060 info->value->u.q_sparc.pc = env->pc;
2061 info->value->u.q_sparc.npc = env->npc;
de0b36b6 2062#elif defined(TARGET_MIPS)
86f4b687 2063 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 2064 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 2065#elif defined(TARGET_TRICORE)
86f4b687 2066 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 2067 info->value->u.tricore.PC = env->PC;
86f4b687
EB
2068#else
2069 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6 2070#endif
afed5a5a
IM
2071 info->value->has_props = !!mc->cpu_index_to_instance_props;
2072 if (info->value->has_props) {
2073 CpuInstanceProperties *props;
2074 props = g_malloc0(sizeof(*props));
2075 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2076 info->value->props = props;
2077 }
de0b36b6
LC
2078
2079 /* XXX: waiting for the qapi to support GSList */
2080 if (!cur_item) {
2081 head = cur_item = info;
2082 } else {
2083 cur_item->next = info;
2084 cur_item = info;
2085 }
2086 }
2087
2088 return head;
2089}
0cfd6a9a
LC
2090
2091void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2092 bool has_cpu, int64_t cpu_index, Error **errp)
2093{
2094 FILE *f;
2095 uint32_t l;
55e5c285 2096 CPUState *cpu;
0cfd6a9a 2097 uint8_t buf[1024];
0dc9daf0 2098 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2099
2100 if (!has_cpu) {
2101 cpu_index = 0;
2102 }
2103
151d1322
AF
2104 cpu = qemu_get_cpu(cpu_index);
2105 if (cpu == NULL) {
c6bd8c70
MA
2106 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2107 "a CPU number");
0cfd6a9a
LC
2108 return;
2109 }
2110
2111 f = fopen(filename, "wb");
2112 if (!f) {
618da851 2113 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2114 return;
2115 }
2116
2117 while (size != 0) {
2118 l = sizeof(buf);
2119 if (l > size)
2120 l = size;
2f4d0f59 2121 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2122 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2123 " specified", orig_addr, orig_size);
2f4d0f59
AK
2124 goto exit;
2125 }
0cfd6a9a 2126 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2127 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2128 goto exit;
2129 }
2130 addr += l;
2131 size -= l;
2132 }
2133
2134exit:
2135 fclose(f);
2136}
6d3962bf
LC
2137
2138void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2139 Error **errp)
2140{
2141 FILE *f;
2142 uint32_t l;
2143 uint8_t buf[1024];
2144
2145 f = fopen(filename, "wb");
2146 if (!f) {
618da851 2147 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2148 return;
2149 }
2150
2151 while (size != 0) {
2152 l = sizeof(buf);
2153 if (l > size)
2154 l = size;
eb6282f2 2155 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2156 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2157 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2158 goto exit;
2159 }
2160 addr += l;
2161 size -= l;
2162 }
2163
2164exit:
2165 fclose(f);
2166}
ab49ab5c
LC
2167
2168void qmp_inject_nmi(Error **errp)
2169{
9cb805fd 2170 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2171}
27498bef
ST
2172
2173void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2174{
2175 if (!use_icount) {
2176 return;
2177 }
2178
2179 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2180 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2181 if (icount_align_option) {
2182 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2183 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2184 } else {
2185 cpu_fprintf(f, "Max guest delay NA\n");
2186 cpu_fprintf(f, "Max guest advance NA\n");
2187 }
2188}