]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
Merge remote-tracking branch 'remotes/stsquad/tags/pull-testing-next-050719-3' into...
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
83c9089e 28#include "monitor/monitor.h"
e688df6b 29#include "qapi/error.h"
112ed241 30#include "qapi/qapi-commands-misc.h"
9af23989 31#include "qapi/qapi-events-run-state.h"
a4e15de9 32#include "qapi/qmp/qerror.h"
d49b6836 33#include "qemu/error-report.h"
76c86615 34#include "qemu/qemu-print.h"
14a48c1d 35#include "sysemu/tcg.h"
da31d594 36#include "sysemu/block-backend.h"
022c62cb 37#include "exec/gdbstub.h"
9c17d615 38#include "sysemu/dma.h"
b3946626 39#include "sysemu/hw_accel.h"
9c17d615 40#include "sysemu/kvm.h"
b0cb0a66 41#include "sysemu/hax.h"
c97d6d2c 42#include "sysemu/hvf.h"
19306806 43#include "sysemu/whpx.h"
63c91552 44#include "exec/exec-all.h"
296af7c9 45
1de7afc9 46#include "qemu/thread.h"
9c17d615
PB
47#include "sysemu/cpus.h"
48#include "sysemu/qtest.h"
1de7afc9 49#include "qemu/main-loop.h"
922a01a0 50#include "qemu/option.h"
1de7afc9 51#include "qemu/bitmap.h"
cb365646 52#include "qemu/seqlock.h"
9c09a251 53#include "qemu/guest-random.h"
8d4e9146 54#include "tcg.h"
9cb805fd 55#include "hw/nmi.h"
8b427044 56#include "sysemu/replay.h"
0ff0fc19 57
6d9cb73c
JK
58#ifdef CONFIG_LINUX
59
60#include <sys/prctl.h>
61
c0532a76
MT
62#ifndef PR_MCE_KILL
63#define PR_MCE_KILL 33
64#endif
65
6d9cb73c
JK
66#ifndef PR_MCE_KILL_SET
67#define PR_MCE_KILL_SET 1
68#endif
69
70#ifndef PR_MCE_KILL_EARLY
71#define PR_MCE_KILL_EARLY 1
72#endif
73
74#endif /* CONFIG_LINUX */
75
27498bef
ST
76int64_t max_delay;
77int64_t max_advance;
296af7c9 78
2adcc85d
JH
79/* vcpu throttling controls */
80static QEMUTimer *throttle_timer;
81static unsigned int throttle_percentage;
82
83#define CPU_THROTTLE_PCT_MIN 1
84#define CPU_THROTTLE_PCT_MAX 99
85#define CPU_THROTTLE_TIMESLICE_NS 10000000
86
321bc0b2
TC
87bool cpu_is_stopped(CPUState *cpu)
88{
89 return cpu->stopped || !runstate_is_running();
90}
91
a98ae1d8 92static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 93{
c64ca814 94 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
95 return false;
96 }
321bc0b2 97 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
98 return true;
99 }
8c2e1b00 100 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 101 kvm_halt_in_kernel()) {
ac873f1e
PM
102 return false;
103 }
104 return true;
105}
106
107static bool all_cpu_threads_idle(void)
108{
182735ef 109 CPUState *cpu;
ac873f1e 110
bdc44640 111 CPU_FOREACH(cpu) {
182735ef 112 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
113 return false;
114 }
115 }
116 return true;
117}
118
946fb27c
PB
119/***********************************************************/
120/* guest cycle counter */
121
a3270e19
PB
122/* Protected by TimersState seqlock */
123
5045e9d9 124static bool icount_sleep = true;
946fb27c
PB
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c 128typedef struct TimersState {
cb365646 129 /* Protected by BQL. */
946fb27c
PB
130 int64_t cpu_ticks_prev;
131 int64_t cpu_ticks_offset;
cb365646 132
94377115
PB
133 /* Protect fields that can be respectively read outside the
134 * BQL, and written from multiple threads.
cb365646
LPF
135 */
136 QemuSeqLock vm_clock_seqlock;
94377115
PB
137 QemuSpin vm_clock_lock;
138
139 int16_t cpu_ticks_enabled;
c96778bb 140
c1ff073c 141 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
142 int16_t icount_time_shift;
143
c96778bb
FK
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
94377115
PB
146
147 int64_t vm_clock_warp_start;
148 int64_t cpu_clock_offset;
149
c96778bb
FK
150 /* Only written by TCG thread */
151 int64_t qemu_icount;
94377115 152
b39e3f34 153 /* for adjusting icount */
b39e3f34
PD
154 QEMUTimer *icount_rt_timer;
155 QEMUTimer *icount_vm_timer;
156 QEMUTimer *icount_warp_timer;
946fb27c
PB
157} TimersState;
158
d9cd4007 159static TimersState timers_state;
8d4e9146
FK
160bool mttcg_enabled;
161
162/*
163 * We default to false if we know other options have been enabled
164 * which are currently incompatible with MTTCG. Otherwise when each
165 * guest (target) has been updated to support:
166 * - atomic instructions
167 * - memory ordering primitives (barriers)
168 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
169 *
170 * Once a guest architecture has been converted to the new primitives
171 * there are two remaining limitations to check.
172 *
173 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
174 * - The host must have a stronger memory order than the guest
175 *
176 * It may be possible in future to support strong guests on weak hosts
177 * but that will require tagging all load/stores in a guest with their
178 * implicit memory order requirements which would likely slow things
179 * down a lot.
180 */
181
182static bool check_tcg_memory_orders_compatible(void)
183{
184#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
185 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
186#else
187 return false;
188#endif
189}
190
191static bool default_mttcg_enabled(void)
192{
83fd9629 193 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
194 return false;
195 } else {
196#ifdef TARGET_SUPPORTS_MTTCG
197 return check_tcg_memory_orders_compatible();
198#else
199 return false;
200#endif
201 }
202}
203
204void qemu_tcg_configure(QemuOpts *opts, Error **errp)
205{
206 const char *t = qemu_opt_get(opts, "thread");
207 if (t) {
208 if (strcmp(t, "multi") == 0) {
209 if (TCG_OVERSIZED_GUEST) {
210 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
211 } else if (use_icount) {
212 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 213 } else {
86953503 214#ifndef TARGET_SUPPORTS_MTTCG
0765691e
MA
215 warn_report("Guest not yet converted to MTTCG - "
216 "you may get unexpected results");
c34c7620 217#endif
8d4e9146 218 if (!check_tcg_memory_orders_compatible()) {
0765691e
MA
219 warn_report("Guest expects a stronger memory ordering "
220 "than the host provides");
8cfef892 221 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
222 }
223 mttcg_enabled = true;
224 }
225 } else if (strcmp(t, "single") == 0) {
226 mttcg_enabled = false;
227 } else {
228 error_setg(errp, "Invalid 'thread' setting %s", t);
229 }
230 } else {
231 mttcg_enabled = default_mttcg_enabled();
232 }
233}
946fb27c 234
e4cd9657
AB
235/* The current number of executed instructions is based on what we
236 * originally budgeted minus the current state of the decrementing
237 * icount counters in extra/u16.low.
238 */
239static int64_t cpu_get_icount_executed(CPUState *cpu)
240{
5e140196
RH
241 return (cpu->icount_budget -
242 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
243}
244
512d3c80
AB
245/*
246 * Update the global shared timer_state.qemu_icount to take into
247 * account executed instructions. This is done by the TCG vCPU
248 * thread so the main-loop can see time has moved forward.
249 */
9b4e6f49 250static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
251{
252 int64_t executed = cpu_get_icount_executed(cpu);
253 cpu->icount_budget -= executed;
254
38adcb6e
EC
255 atomic_set_i64(&timers_state.qemu_icount,
256 timers_state.qemu_icount + executed);
9b4e6f49
PB
257}
258
259/*
260 * Update the global shared timer_state.qemu_icount to take into
261 * account executed instructions. This is done by the TCG vCPU
262 * thread so the main-loop can see time has moved forward.
263 */
264void cpu_update_icount(CPUState *cpu)
265{
266 seqlock_write_lock(&timers_state.vm_clock_seqlock,
267 &timers_state.vm_clock_lock);
268 cpu_update_icount_locked(cpu);
94377115
PB
269 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
270 &timers_state.vm_clock_lock);
512d3c80
AB
271}
272
c1ff073c 273static int64_t cpu_get_icount_raw_locked(void)
946fb27c 274{
4917cf44 275 CPUState *cpu = current_cpu;
946fb27c 276
243c5f77 277 if (cpu && cpu->running) {
414b15c9 278 if (!cpu->can_do_io) {
493d89bf 279 error_report("Bad icount read");
2a62914b 280 exit(1);
946fb27c 281 }
e4cd9657 282 /* Take into account what has run */
9b4e6f49 283 cpu_update_icount_locked(cpu);
946fb27c 284 }
38adcb6e
EC
285 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
286 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
287}
288
2a62914b
PD
289static int64_t cpu_get_icount_locked(void)
290{
c1ff073c 291 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
292 return atomic_read_i64(&timers_state.qemu_icount_bias) +
293 cpu_icount_to_ns(icount);
c1ff073c
PB
294}
295
296int64_t cpu_get_icount_raw(void)
297{
298 int64_t icount;
299 unsigned start;
300
301 do {
302 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
303 icount = cpu_get_icount_raw_locked();
304 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
305
306 return icount;
946fb27c
PB
307}
308
c1ff073c 309/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
310int64_t cpu_get_icount(void)
311{
312 int64_t icount;
313 unsigned start;
314
315 do {
316 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
317 icount = cpu_get_icount_locked();
318 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
319
320 return icount;
321}
322
3f031313
FK
323int64_t cpu_icount_to_ns(int64_t icount)
324{
c1ff073c 325 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
326}
327
f2a4ad6d
PB
328static int64_t cpu_get_ticks_locked(void)
329{
330 int64_t ticks = timers_state.cpu_ticks_offset;
331 if (timers_state.cpu_ticks_enabled) {
332 ticks += cpu_get_host_ticks();
333 }
334
335 if (timers_state.cpu_ticks_prev > ticks) {
336 /* Non increasing ticks may happen if the host uses software suspend. */
337 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
338 ticks = timers_state.cpu_ticks_prev;
339 }
340
341 timers_state.cpu_ticks_prev = ticks;
342 return ticks;
343}
344
d90f3cca
C
345/* return the time elapsed in VM between vm_start and vm_stop. Unless
346 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
347 * counter.
d90f3cca 348 */
946fb27c
PB
349int64_t cpu_get_ticks(void)
350{
5f3e3101
PB
351 int64_t ticks;
352
946fb27c
PB
353 if (use_icount) {
354 return cpu_get_icount();
355 }
5f3e3101 356
f2a4ad6d
PB
357 qemu_spin_lock(&timers_state.vm_clock_lock);
358 ticks = cpu_get_ticks_locked();
359 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 360 return ticks;
946fb27c
PB
361}
362
cb365646 363static int64_t cpu_get_clock_locked(void)
946fb27c 364{
1d45cea5 365 int64_t time;
cb365646 366
1d45cea5 367 time = timers_state.cpu_clock_offset;
5f3e3101 368 if (timers_state.cpu_ticks_enabled) {
1d45cea5 369 time += get_clock();
946fb27c 370 }
cb365646 371
1d45cea5 372 return time;
cb365646
LPF
373}
374
d90f3cca 375/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
376 * the time between vm_start and vm_stop
377 */
cb365646
LPF
378int64_t cpu_get_clock(void)
379{
380 int64_t ti;
381 unsigned start;
382
383 do {
384 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
385 ti = cpu_get_clock_locked();
386 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
387
388 return ti;
946fb27c
PB
389}
390
cb365646 391/* enable cpu_get_ticks()
3224e878 392 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 393 */
946fb27c
PB
394void cpu_enable_ticks(void)
395{
94377115
PB
396 seqlock_write_lock(&timers_state.vm_clock_seqlock,
397 &timers_state.vm_clock_lock);
946fb27c 398 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 399 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
400 timers_state.cpu_clock_offset -= get_clock();
401 timers_state.cpu_ticks_enabled = 1;
402 }
94377115
PB
403 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
404 &timers_state.vm_clock_lock);
946fb27c
PB
405}
406
407/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 408 * cpu_get_ticks() after that.
3224e878 409 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 410 */
946fb27c
PB
411void cpu_disable_ticks(void)
412{
94377115
PB
413 seqlock_write_lock(&timers_state.vm_clock_seqlock,
414 &timers_state.vm_clock_lock);
946fb27c 415 if (timers_state.cpu_ticks_enabled) {
4a7428c5 416 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 417 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
418 timers_state.cpu_ticks_enabled = 0;
419 }
94377115
PB
420 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
421 &timers_state.vm_clock_lock);
946fb27c
PB
422}
423
424/* Correlation between real and virtual time is always going to be
425 fairly approximate, so ignore small variation.
426 When the guest is idle real and virtual time will be aligned in
427 the IO wait loop. */
73bcb24d 428#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
429
430static void icount_adjust(void)
431{
432 int64_t cur_time;
433 int64_t cur_icount;
434 int64_t delta;
a3270e19
PB
435
436 /* Protected by TimersState mutex. */
946fb27c 437 static int64_t last_delta;
468cc7cf 438
946fb27c
PB
439 /* If the VM is not running, then do nothing. */
440 if (!runstate_is_running()) {
441 return;
442 }
468cc7cf 443
94377115
PB
444 seqlock_write_lock(&timers_state.vm_clock_seqlock,
445 &timers_state.vm_clock_lock);
17a15f1b
PB
446 cur_time = cpu_get_clock_locked();
447 cur_icount = cpu_get_icount_locked();
468cc7cf 448
946fb27c
PB
449 delta = cur_icount - cur_time;
450 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
451 if (delta > 0
452 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 453 && timers_state.icount_time_shift > 0) {
946fb27c 454 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
455 atomic_set(&timers_state.icount_time_shift,
456 timers_state.icount_time_shift - 1);
946fb27c
PB
457 }
458 if (delta < 0
459 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 460 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 461 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
462 atomic_set(&timers_state.icount_time_shift,
463 timers_state.icount_time_shift + 1);
946fb27c
PB
464 }
465 last_delta = delta;
c97595d1
EC
466 atomic_set_i64(&timers_state.qemu_icount_bias,
467 cur_icount - (timers_state.qemu_icount
468 << timers_state.icount_time_shift));
94377115
PB
469 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
470 &timers_state.vm_clock_lock);
946fb27c
PB
471}
472
473static void icount_adjust_rt(void *opaque)
474{
b39e3f34 475 timer_mod(timers_state.icount_rt_timer,
1979b908 476 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
477 icount_adjust();
478}
479
480static void icount_adjust_vm(void *opaque)
481{
b39e3f34 482 timer_mod(timers_state.icount_vm_timer,
40daca54 483 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 484 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
485 icount_adjust();
486}
487
488static int64_t qemu_icount_round(int64_t count)
489{
c1ff073c
PB
490 int shift = atomic_read(&timers_state.icount_time_shift);
491 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
492}
493
efab87cf 494static void icount_warp_rt(void)
946fb27c 495{
ccffff48
AB
496 unsigned seq;
497 int64_t warp_start;
498
17a15f1b
PB
499 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
500 * changes from -1 to another value, so the race here is okay.
501 */
ccffff48
AB
502 do {
503 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 504 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
505 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
506
507 if (warp_start == -1) {
946fb27c
PB
508 return;
509 }
510
94377115
PB
511 seqlock_write_lock(&timers_state.vm_clock_seqlock,
512 &timers_state.vm_clock_lock);
946fb27c 513 if (runstate_is_running()) {
74c0b816
PB
514 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
515 cpu_get_clock_locked());
8ed961d9
PB
516 int64_t warp_delta;
517
b39e3f34 518 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 519 if (use_icount == 2) {
946fb27c 520 /*
40daca54 521 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
522 * far ahead of real time.
523 */
17a15f1b 524 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 525 int64_t delta = clock - cur_icount;
8ed961d9 526 warp_delta = MIN(warp_delta, delta);
946fb27c 527 }
c97595d1
EC
528 atomic_set_i64(&timers_state.qemu_icount_bias,
529 timers_state.qemu_icount_bias + warp_delta);
946fb27c 530 }
b39e3f34 531 timers_state.vm_clock_warp_start = -1;
94377115
PB
532 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
533 &timers_state.vm_clock_lock);
8ed961d9
PB
534
535 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
536 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
537 }
946fb27c
PB
538}
539
e76d1798 540static void icount_timer_cb(void *opaque)
efab87cf 541{
e76d1798
PD
542 /* No need for a checkpoint because the timer already synchronizes
543 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
544 */
545 icount_warp_rt();
efab87cf
PD
546}
547
8156be56
PB
548void qtest_clock_warp(int64_t dest)
549{
40daca54 550 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 551 AioContext *aio_context;
8156be56 552 assert(qtest_enabled());
efef88b3 553 aio_context = qemu_get_aio_context();
8156be56 554 while (clock < dest) {
40daca54 555 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 556 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 557
94377115
PB
558 seqlock_write_lock(&timers_state.vm_clock_seqlock,
559 &timers_state.vm_clock_lock);
c97595d1
EC
560 atomic_set_i64(&timers_state.qemu_icount_bias,
561 timers_state.qemu_icount_bias + warp);
94377115
PB
562 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
563 &timers_state.vm_clock_lock);
17a15f1b 564
40daca54 565 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 566 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 567 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 568 }
40daca54 569 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
570}
571
e76d1798 572void qemu_start_warp_timer(void)
946fb27c 573{
ce78d18c 574 int64_t clock;
946fb27c
PB
575 int64_t deadline;
576
e76d1798 577 if (!use_icount) {
946fb27c
PB
578 return;
579 }
580
8bd7f71d
PD
581 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
582 * do not fire, so computing the deadline does not make sense.
583 */
584 if (!runstate_is_running()) {
585 return;
586 }
587
0c08185f
PD
588 if (replay_mode != REPLAY_MODE_PLAY) {
589 if (!all_cpu_threads_idle()) {
590 return;
591 }
8bd7f71d 592
0c08185f
PD
593 if (qtest_enabled()) {
594 /* When testing, qtest commands advance icount. */
595 return;
596 }
946fb27c 597
0c08185f
PD
598 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
599 } else {
600 /* warp clock deterministically in record/replay mode */
601 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
602 /* vCPU is sleeping and warp can't be started.
603 It is probably a race condition: notification sent
604 to vCPU was processed in advance and vCPU went to sleep.
605 Therefore we have to wake it up for doing someting. */
606 if (replay_has_checkpoint()) {
607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
608 }
609 return;
610 }
8156be56
PB
611 }
612
ac70aafc 613 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 614 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 615 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 616 if (deadline < 0) {
d7a0f71d
VC
617 static bool notified;
618 if (!icount_sleep && !notified) {
3dc6f869 619 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
620 notified = true;
621 }
ce78d18c 622 return;
ac70aafc
AB
623 }
624
946fb27c
PB
625 if (deadline > 0) {
626 /*
40daca54 627 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
628 * sleep. Otherwise, the CPU might be waiting for a future timer
629 * interrupt to wake it up, but the interrupt never comes because
630 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 631 * QEMU_CLOCK_VIRTUAL.
946fb27c 632 */
5045e9d9
VC
633 if (!icount_sleep) {
634 /*
635 * We never let VCPUs sleep in no sleep icount mode.
636 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
637 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
638 * It is useful when we want a deterministic execution time,
639 * isolated from host latencies.
640 */
94377115
PB
641 seqlock_write_lock(&timers_state.vm_clock_seqlock,
642 &timers_state.vm_clock_lock);
c97595d1
EC
643 atomic_set_i64(&timers_state.qemu_icount_bias,
644 timers_state.qemu_icount_bias + deadline);
94377115
PB
645 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
646 &timers_state.vm_clock_lock);
5045e9d9
VC
647 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
648 } else {
649 /*
650 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
651 * "real" time, (related to the time left until the next event) has
652 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
653 * This avoids that the warps are visible externally; for example,
654 * you will not be sending network packets continuously instead of
655 * every 100ms.
656 */
94377115
PB
657 seqlock_write_lock(&timers_state.vm_clock_seqlock,
658 &timers_state.vm_clock_lock);
b39e3f34
PD
659 if (timers_state.vm_clock_warp_start == -1
660 || timers_state.vm_clock_warp_start > clock) {
661 timers_state.vm_clock_warp_start = clock;
5045e9d9 662 }
94377115
PB
663 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
664 &timers_state.vm_clock_lock);
b39e3f34
PD
665 timer_mod_anticipate(timers_state.icount_warp_timer,
666 clock + deadline);
ce78d18c 667 }
ac70aafc 668 } else if (deadline == 0) {
40daca54 669 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
670 }
671}
672
e76d1798
PD
673static void qemu_account_warp_timer(void)
674{
675 if (!use_icount || !icount_sleep) {
676 return;
677 }
678
679 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
680 * do not fire, so computing the deadline does not make sense.
681 */
682 if (!runstate_is_running()) {
683 return;
684 }
685
686 /* warp clock deterministically in record/replay mode */
687 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
688 return;
689 }
690
b39e3f34 691 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
692 icount_warp_rt();
693}
694
d09eae37
FK
695static bool icount_state_needed(void *opaque)
696{
697 return use_icount;
698}
699
b39e3f34
PD
700static bool warp_timer_state_needed(void *opaque)
701{
702 TimersState *s = opaque;
703 return s->icount_warp_timer != NULL;
704}
705
706static bool adjust_timers_state_needed(void *opaque)
707{
708 TimersState *s = opaque;
709 return s->icount_rt_timer != NULL;
710}
711
712/*
713 * Subsection for warp timer migration is optional, because may not be created
714 */
715static const VMStateDescription icount_vmstate_warp_timer = {
716 .name = "timer/icount/warp_timer",
717 .version_id = 1,
718 .minimum_version_id = 1,
719 .needed = warp_timer_state_needed,
720 .fields = (VMStateField[]) {
721 VMSTATE_INT64(vm_clock_warp_start, TimersState),
722 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
723 VMSTATE_END_OF_LIST()
724 }
725};
726
727static const VMStateDescription icount_vmstate_adjust_timers = {
728 .name = "timer/icount/timers",
729 .version_id = 1,
730 .minimum_version_id = 1,
731 .needed = adjust_timers_state_needed,
732 .fields = (VMStateField[]) {
733 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
734 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
735 VMSTATE_END_OF_LIST()
736 }
737};
738
d09eae37
FK
739/*
740 * This is a subsection for icount migration.
741 */
742static const VMStateDescription icount_vmstate_timers = {
743 .name = "timer/icount",
744 .version_id = 1,
745 .minimum_version_id = 1,
5cd8cada 746 .needed = icount_state_needed,
d09eae37
FK
747 .fields = (VMStateField[]) {
748 VMSTATE_INT64(qemu_icount_bias, TimersState),
749 VMSTATE_INT64(qemu_icount, TimersState),
750 VMSTATE_END_OF_LIST()
b39e3f34
PD
751 },
752 .subsections = (const VMStateDescription*[]) {
753 &icount_vmstate_warp_timer,
754 &icount_vmstate_adjust_timers,
755 NULL
d09eae37
FK
756 }
757};
758
946fb27c
PB
759static const VMStateDescription vmstate_timers = {
760 .name = "timer",
761 .version_id = 2,
762 .minimum_version_id = 1,
35d08458 763 .fields = (VMStateField[]) {
946fb27c 764 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 765 VMSTATE_UNUSED(8),
946fb27c
PB
766 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
767 VMSTATE_END_OF_LIST()
d09eae37 768 },
5cd8cada
JQ
769 .subsections = (const VMStateDescription*[]) {
770 &icount_vmstate_timers,
771 NULL
946fb27c
PB
772 }
773};
774
14e6fe12 775static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 776{
2adcc85d
JH
777 double pct;
778 double throttle_ratio;
779 long sleeptime_ns;
780
781 if (!cpu_throttle_get_percentage()) {
782 return;
783 }
784
785 pct = (double)cpu_throttle_get_percentage()/100;
786 throttle_ratio = pct / (1 - pct);
787 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
788
789 qemu_mutex_unlock_iothread();
2adcc85d
JH
790 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
791 qemu_mutex_lock_iothread();
90bb0c04 792 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
793}
794
795static void cpu_throttle_timer_tick(void *opaque)
796{
797 CPUState *cpu;
798 double pct;
799
800 /* Stop the timer if needed */
801 if (!cpu_throttle_get_percentage()) {
802 return;
803 }
804 CPU_FOREACH(cpu) {
805 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
806 async_run_on_cpu(cpu, cpu_throttle_thread,
807 RUN_ON_CPU_NULL);
2adcc85d
JH
808 }
809 }
810
811 pct = (double)cpu_throttle_get_percentage()/100;
812 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
813 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
814}
815
816void cpu_throttle_set(int new_throttle_pct)
817{
818 /* Ensure throttle percentage is within valid range */
819 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
820 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
821
822 atomic_set(&throttle_percentage, new_throttle_pct);
823
824 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
825 CPU_THROTTLE_TIMESLICE_NS);
826}
827
828void cpu_throttle_stop(void)
829{
830 atomic_set(&throttle_percentage, 0);
831}
832
833bool cpu_throttle_active(void)
834{
835 return (cpu_throttle_get_percentage() != 0);
836}
837
838int cpu_throttle_get_percentage(void)
839{
840 return atomic_read(&throttle_percentage);
841}
842
4603ea01
PD
843void cpu_ticks_init(void)
844{
ccdb3c1f 845 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 846 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 847 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
848 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
849 cpu_throttle_timer_tick, NULL);
4603ea01
PD
850}
851
1ad9580b 852void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 853{
1ad9580b 854 const char *option;
a8bfac37 855 char *rem_str = NULL;
1ad9580b 856
1ad9580b 857 option = qemu_opt_get(opts, "shift");
946fb27c 858 if (!option) {
a8bfac37
ST
859 if (qemu_opt_get(opts, "align") != NULL) {
860 error_setg(errp, "Please specify shift option when using align");
861 }
946fb27c
PB
862 return;
863 }
f1f4b57e
VC
864
865 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9 866 if (icount_sleep) {
b39e3f34 867 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 868 icount_timer_cb, NULL);
5045e9d9 869 }
f1f4b57e 870
a8bfac37 871 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
872
873 if (icount_align_option && !icount_sleep) {
778d9f9b 874 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 875 }
946fb27c 876 if (strcmp(option, "auto") != 0) {
a8bfac37 877 errno = 0;
c1ff073c 878 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
a8bfac37
ST
879 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
880 error_setg(errp, "icount: Invalid shift value");
881 }
946fb27c
PB
882 use_icount = 1;
883 return;
a8bfac37
ST
884 } else if (icount_align_option) {
885 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 886 } else if (!icount_sleep) {
778d9f9b 887 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
888 }
889
890 use_icount = 2;
891
892 /* 125MIPS seems a reasonable initial guess at the guest speed.
893 It will be corrected fairly quickly anyway. */
c1ff073c 894 timers_state.icount_time_shift = 3;
946fb27c
PB
895
896 /* Have both realtime and virtual time triggers for speed adjustment.
897 The realtime trigger catches emulated time passing too slowly,
898 the virtual time trigger catches emulated time passing too fast.
899 Realtime triggers occur even when idle, so use them less frequently
900 than VM triggers. */
b39e3f34
PD
901 timers_state.vm_clock_warp_start = -1;
902 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 903 icount_adjust_rt, NULL);
b39e3f34 904 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 905 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 906 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 907 icount_adjust_vm, NULL);
b39e3f34 908 timer_mod(timers_state.icount_vm_timer,
40daca54 909 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 910 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
911}
912
6546706d
AB
913/***********************************************************/
914/* TCG vCPU kick timer
915 *
916 * The kick timer is responsible for moving single threaded vCPU
917 * emulation on to the next vCPU. If more than one vCPU is running a
918 * timer event with force a cpu->exit so the next vCPU can get
919 * scheduled.
920 *
921 * The timer is removed if all vCPUs are idle and restarted again once
922 * idleness is complete.
923 */
924
925static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 926static CPUState *tcg_current_rr_cpu;
6546706d
AB
927
928#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
929
930static inline int64_t qemu_tcg_next_kick(void)
931{
932 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
933}
934
791158d9
AB
935/* Kick the currently round-robin scheduled vCPU */
936static void qemu_cpu_kick_rr_cpu(void)
937{
938 CPUState *cpu;
791158d9
AB
939 do {
940 cpu = atomic_mb_read(&tcg_current_rr_cpu);
941 if (cpu) {
942 cpu_exit(cpu);
943 }
944 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
945}
946
6b8f0187
PB
947static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
948{
949}
950
3f53bc61
PB
951void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
952{
6b8f0187
PB
953 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
954 qemu_notify_event();
955 return;
956 }
957
c52e7132
PM
958 if (qemu_in_vcpu_thread()) {
959 /* A CPU is currently running; kick it back out to the
960 * tcg_cpu_exec() loop so it will recalculate its
961 * icount deadline immediately.
962 */
963 qemu_cpu_kick(current_cpu);
964 } else if (first_cpu) {
6b8f0187
PB
965 /* qemu_cpu_kick is not enough to kick a halted CPU out of
966 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
967 * causes cpu_thread_is_idle to return false. This way,
968 * handle_icount_deadline can run.
c52e7132
PM
969 * If we have no CPUs at all for some reason, we don't
970 * need to do anything.
6b8f0187
PB
971 */
972 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
973 }
3f53bc61
PB
974}
975
6546706d
AB
976static void kick_tcg_thread(void *opaque)
977{
978 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 979 qemu_cpu_kick_rr_cpu();
6546706d
AB
980}
981
982static void start_tcg_kick_timer(void)
983{
db08b687
PB
984 assert(!mttcg_enabled);
985 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
986 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
987 kick_tcg_thread, NULL);
1926ab27
AB
988 }
989 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
990 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
991 }
992}
993
994static void stop_tcg_kick_timer(void)
995{
db08b687 996 assert(!mttcg_enabled);
1926ab27 997 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 998 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
999 }
1000}
1001
296af7c9
BS
1002/***********************************************************/
1003void hw_error(const char *fmt, ...)
1004{
1005 va_list ap;
55e5c285 1006 CPUState *cpu;
296af7c9
BS
1007
1008 va_start(ap, fmt);
1009 fprintf(stderr, "qemu: hardware error: ");
1010 vfprintf(stderr, fmt, ap);
1011 fprintf(stderr, "\n");
bdc44640 1012 CPU_FOREACH(cpu) {
55e5c285 1013 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 1014 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
1015 }
1016 va_end(ap);
1017 abort();
1018}
1019
1020void cpu_synchronize_all_states(void)
1021{
182735ef 1022 CPUState *cpu;
296af7c9 1023
bdc44640 1024 CPU_FOREACH(cpu) {
182735ef 1025 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
1026 /* TODO: move to cpu_synchronize_state() */
1027 if (hvf_enabled()) {
1028 hvf_cpu_synchronize_state(cpu);
1029 }
296af7c9
BS
1030 }
1031}
1032
1033void cpu_synchronize_all_post_reset(void)
1034{
182735ef 1035 CPUState *cpu;
296af7c9 1036
bdc44640 1037 CPU_FOREACH(cpu) {
182735ef 1038 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1039 /* TODO: move to cpu_synchronize_post_reset() */
1040 if (hvf_enabled()) {
1041 hvf_cpu_synchronize_post_reset(cpu);
1042 }
296af7c9
BS
1043 }
1044}
1045
1046void cpu_synchronize_all_post_init(void)
1047{
182735ef 1048 CPUState *cpu;
296af7c9 1049
bdc44640 1050 CPU_FOREACH(cpu) {
182735ef 1051 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1052 /* TODO: move to cpu_synchronize_post_init() */
1053 if (hvf_enabled()) {
1054 hvf_cpu_synchronize_post_init(cpu);
1055 }
296af7c9
BS
1056 }
1057}
1058
75e972da
DG
1059void cpu_synchronize_all_pre_loadvm(void)
1060{
1061 CPUState *cpu;
1062
1063 CPU_FOREACH(cpu) {
1064 cpu_synchronize_pre_loadvm(cpu);
1065 }
1066}
1067
4486e89c 1068static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1069{
56983463
KW
1070 int ret = 0;
1071
1354869c 1072 if (runstate_is_running()) {
296af7c9 1073 cpu_disable_ticks();
296af7c9 1074 pause_all_vcpus();
f5bbfba1 1075 runstate_set(state);
1dfb4dd9 1076 vm_state_notify(0, state);
4486e89c 1077 if (send_stop) {
3ab72385 1078 qapi_event_send_stop();
4486e89c 1079 }
296af7c9 1080 }
56983463 1081
594a45ce 1082 bdrv_drain_all();
6d0ceb80 1083 replay_disable_events();
22af08ea 1084 ret = bdrv_flush_all();
594a45ce 1085
56983463 1086 return ret;
296af7c9
BS
1087}
1088
4486e89c
SH
1089/* Special vm_stop() variant for terminating the process. Historically clients
1090 * did not expect a QMP STOP event and so we need to retain compatibility.
1091 */
1092int vm_shutdown(void)
1093{
1094 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1095}
1096
a1fcaa73 1097static bool cpu_can_run(CPUState *cpu)
296af7c9 1098{
4fdeee7c 1099 if (cpu->stop) {
a1fcaa73 1100 return false;
0ab07c62 1101 }
321bc0b2 1102 if (cpu_is_stopped(cpu)) {
a1fcaa73 1103 return false;
0ab07c62 1104 }
a1fcaa73 1105 return true;
296af7c9
BS
1106}
1107
91325046 1108static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1109{
64f6b346 1110 gdb_set_stop_cpu(cpu);
8cf71710 1111 qemu_system_debug_request();
f324e766 1112 cpu->stopped = true;
3c638d06
JK
1113}
1114
6d9cb73c
JK
1115#ifdef CONFIG_LINUX
1116static void sigbus_reraise(void)
1117{
1118 sigset_t set;
1119 struct sigaction action;
1120
1121 memset(&action, 0, sizeof(action));
1122 action.sa_handler = SIG_DFL;
1123 if (!sigaction(SIGBUS, &action, NULL)) {
1124 raise(SIGBUS);
1125 sigemptyset(&set);
1126 sigaddset(&set, SIGBUS);
a2d1761d 1127 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1128 }
1129 perror("Failed to re-raise SIGBUS!\n");
1130 abort();
1131}
1132
d98d4072 1133static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1134{
a16fc07e
PB
1135 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1136 sigbus_reraise();
1137 }
1138
2ae41db2
PB
1139 if (current_cpu) {
1140 /* Called asynchronously in VCPU thread. */
1141 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1142 sigbus_reraise();
1143 }
1144 } else {
1145 /* Called synchronously (via signalfd) in main thread. */
1146 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1147 sigbus_reraise();
1148 }
6d9cb73c
JK
1149 }
1150}
1151
1152static void qemu_init_sigbus(void)
1153{
1154 struct sigaction action;
1155
1156 memset(&action, 0, sizeof(action));
1157 action.sa_flags = SA_SIGINFO;
d98d4072 1158 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1159 sigaction(SIGBUS, &action, NULL);
1160
1161 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1162}
6d9cb73c 1163#else /* !CONFIG_LINUX */
6d9cb73c
JK
1164static void qemu_init_sigbus(void)
1165{
1166}
a16fc07e 1167#endif /* !CONFIG_LINUX */
ff48eb5f 1168
b2532d88 1169static QemuMutex qemu_global_mutex;
296af7c9
BS
1170
1171static QemuThread io_thread;
1172
296af7c9
BS
1173/* cpu creation */
1174static QemuCond qemu_cpu_cond;
1175/* system init */
296af7c9
BS
1176static QemuCond qemu_pause_cond;
1177
d3b12f5d 1178void qemu_init_cpu_loop(void)
296af7c9 1179{
6d9cb73c 1180 qemu_init_sigbus();
ed94592b 1181 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1182 qemu_cond_init(&qemu_pause_cond);
296af7c9 1183 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1184
b7680cb6 1185 qemu_thread_get_self(&io_thread);
296af7c9
BS
1186}
1187
14e6fe12 1188void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1189{
d148d90e 1190 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1191}
1192
4c055ab5
GZ
1193static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1194{
1195 if (kvm_destroy_vcpu(cpu) < 0) {
1196 error_report("kvm_destroy_vcpu failed");
1197 exit(EXIT_FAILURE);
1198 }
1199}
1200
1201static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1202{
1203}
1204
ebd05fea
DH
1205static void qemu_cpu_stop(CPUState *cpu, bool exit)
1206{
1207 g_assert(qemu_cpu_is_self(cpu));
1208 cpu->stop = false;
1209 cpu->stopped = true;
1210 if (exit) {
1211 cpu_exit(cpu);
1212 }
1213 qemu_cond_broadcast(&qemu_pause_cond);
1214}
1215
509a0d78 1216static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1217{
37257942 1218 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1219 if (cpu->stop) {
ebd05fea 1220 qemu_cpu_stop(cpu, false);
296af7c9 1221 }
a5403c69 1222 process_queued_cpu_work(cpu);
37257942
AB
1223}
1224
a8efa606 1225static void qemu_tcg_rr_wait_io_event(void)
37257942 1226{
a8efa606
PB
1227 CPUState *cpu;
1228
db08b687 1229 while (all_cpu_threads_idle()) {
6546706d 1230 stop_tcg_kick_timer();
a8efa606 1231 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1232 }
296af7c9 1233
6546706d
AB
1234 start_tcg_kick_timer();
1235
a8efa606
PB
1236 CPU_FOREACH(cpu) {
1237 qemu_wait_io_event_common(cpu);
1238 }
296af7c9
BS
1239}
1240
db08b687 1241static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1242{
a98ae1d8 1243 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1244 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1245 }
296af7c9 1246
db08b687
PB
1247#ifdef _WIN32
1248 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1249 if (!tcg_enabled()) {
1250 SleepEx(0, TRUE);
c97d6d2c 1251 }
db08b687 1252#endif
c97d6d2c
SAGDR
1253 qemu_wait_io_event_common(cpu);
1254}
1255
7e97cd88 1256static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1257{
48a106bd 1258 CPUState *cpu = arg;
84b4915d 1259 int r;
296af7c9 1260
ab28bd23
PB
1261 rcu_register_thread();
1262
2e7f7a3c 1263 qemu_mutex_lock_iothread();
814e612e 1264 qemu_thread_get_self(cpu->thread);
9f09e18a 1265 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1266 cpu->can_do_io = 1;
4917cf44 1267 current_cpu = cpu;
296af7c9 1268
504134d2 1269 r = kvm_init_vcpu(cpu);
84b4915d 1270 if (r < 0) {
493d89bf 1271 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1272 exit(1);
1273 }
296af7c9 1274
18268b60 1275 kvm_init_cpu_signals(cpu);
296af7c9
BS
1276
1277 /* signal CPU creation */
61a46217 1278 cpu->created = true;
296af7c9 1279 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1280 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1281
4c055ab5 1282 do {
a1fcaa73 1283 if (cpu_can_run(cpu)) {
1458c363 1284 r = kvm_cpu_exec(cpu);
83f338f7 1285 if (r == EXCP_DEBUG) {
91325046 1286 cpu_handle_guest_debug(cpu);
83f338f7 1287 }
0ab07c62 1288 }
db08b687 1289 qemu_wait_io_event(cpu);
4c055ab5 1290 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1291
4c055ab5 1292 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1293 cpu->created = false;
1294 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1295 qemu_mutex_unlock_iothread();
57615ed5 1296 rcu_unregister_thread();
296af7c9
BS
1297 return NULL;
1298}
1299
c7f0f3b1
AL
1300static void *qemu_dummy_cpu_thread_fn(void *arg)
1301{
1302#ifdef _WIN32
493d89bf 1303 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1304 exit(1);
1305#else
10a9021d 1306 CPUState *cpu = arg;
c7f0f3b1
AL
1307 sigset_t waitset;
1308 int r;
1309
ab28bd23
PB
1310 rcu_register_thread();
1311
c7f0f3b1 1312 qemu_mutex_lock_iothread();
814e612e 1313 qemu_thread_get_self(cpu->thread);
9f09e18a 1314 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1315 cpu->can_do_io = 1;
37257942 1316 current_cpu = cpu;
c7f0f3b1
AL
1317
1318 sigemptyset(&waitset);
1319 sigaddset(&waitset, SIG_IPI);
1320
1321 /* signal CPU creation */
61a46217 1322 cpu->created = true;
c7f0f3b1 1323 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1324 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1325
d2831ab0 1326 do {
c7f0f3b1
AL
1327 qemu_mutex_unlock_iothread();
1328 do {
1329 int sig;
1330 r = sigwait(&waitset, &sig);
1331 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1332 if (r == -1) {
1333 perror("sigwait");
1334 exit(1);
1335 }
1336 qemu_mutex_lock_iothread();
db08b687 1337 qemu_wait_io_event(cpu);
d2831ab0 1338 } while (!cpu->unplug);
c7f0f3b1 1339
d40bfcbb 1340 qemu_mutex_unlock_iothread();
d2831ab0 1341 rcu_unregister_thread();
c7f0f3b1
AL
1342 return NULL;
1343#endif
1344}
1345
1be7fcb8
AB
1346static int64_t tcg_get_icount_limit(void)
1347{
1348 int64_t deadline;
1349
1350 if (replay_mode != REPLAY_MODE_PLAY) {
1351 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1352
1353 /* Maintain prior (possibly buggy) behaviour where if no deadline
1354 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1355 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1356 * nanoseconds.
1357 */
1358 if ((deadline < 0) || (deadline > INT32_MAX)) {
1359 deadline = INT32_MAX;
1360 }
1361
1362 return qemu_icount_round(deadline);
1363 } else {
1364 return replay_get_instructions();
1365 }
1366}
1367
12e9700d
AB
1368static void handle_icount_deadline(void)
1369{
6b8f0187 1370 assert(qemu_in_vcpu_thread());
12e9700d
AB
1371 if (use_icount) {
1372 int64_t deadline =
1373 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1374
1375 if (deadline == 0) {
6b8f0187 1376 /* Wake up other AioContexts. */
12e9700d 1377 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1378 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1379 }
1380 }
1381}
1382
05248382 1383static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1384{
1be7fcb8 1385 if (use_icount) {
eda5f7c6 1386 int insns_left;
05248382
AB
1387
1388 /* These should always be cleared by process_icount_data after
1389 * each vCPU execution. However u16.high can be raised
1390 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1391 */
5e140196 1392 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1393 g_assert(cpu->icount_extra == 0);
1394
eda5f7c6
AB
1395 cpu->icount_budget = tcg_get_icount_limit();
1396 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1397 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1398 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1399
1400 replay_mutex_lock();
1be7fcb8 1401 }
05248382
AB
1402}
1403
1404static void process_icount_data(CPUState *cpu)
1405{
1be7fcb8 1406 if (use_icount) {
e4cd9657 1407 /* Account for executed instructions */
512d3c80 1408 cpu_update_icount(cpu);
05248382
AB
1409
1410 /* Reset the counters */
5e140196 1411 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1412 cpu->icount_extra = 0;
e4cd9657
AB
1413 cpu->icount_budget = 0;
1414
1be7fcb8 1415 replay_account_executed_instructions();
d759c951
AB
1416
1417 replay_mutex_unlock();
1be7fcb8 1418 }
05248382
AB
1419}
1420
1421
1422static int tcg_cpu_exec(CPUState *cpu)
1423{
1424 int ret;
1425#ifdef CONFIG_PROFILER
1426 int64_t ti;
1427#endif
1428
f28d0dfd 1429 assert(tcg_enabled());
05248382
AB
1430#ifdef CONFIG_PROFILER
1431 ti = profile_getclock();
1432#endif
05248382
AB
1433 cpu_exec_start(cpu);
1434 ret = cpu_exec(cpu);
1435 cpu_exec_end(cpu);
05248382 1436#ifdef CONFIG_PROFILER
72fd2efb
EC
1437 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1438 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1439#endif
1be7fcb8
AB
1440 return ret;
1441}
1442
c93bbbef
AB
1443/* Destroy any remaining vCPUs which have been unplugged and have
1444 * finished running
1445 */
1446static void deal_with_unplugged_cpus(void)
1be7fcb8 1447{
c93bbbef 1448 CPUState *cpu;
1be7fcb8 1449
c93bbbef
AB
1450 CPU_FOREACH(cpu) {
1451 if (cpu->unplug && !cpu_can_run(cpu)) {
1452 qemu_tcg_destroy_vcpu(cpu);
1453 cpu->created = false;
1454 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1455 break;
1456 }
1457 }
1be7fcb8 1458}
bdb7ca67 1459
6546706d
AB
1460/* Single-threaded TCG
1461 *
1462 * In the single-threaded case each vCPU is simulated in turn. If
1463 * there is more than a single vCPU we create a simple timer to kick
1464 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1465 * This is done explicitly rather than relying on side-effects
1466 * elsewhere.
1467 */
1468
37257942 1469static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1470{
c3586ba7 1471 CPUState *cpu = arg;
296af7c9 1472
f28d0dfd 1473 assert(tcg_enabled());
ab28bd23 1474 rcu_register_thread();
3468b59e 1475 tcg_register_thread();
ab28bd23 1476
2e7f7a3c 1477 qemu_mutex_lock_iothread();
814e612e 1478 qemu_thread_get_self(cpu->thread);
296af7c9 1479
5a9c973b
DH
1480 cpu->thread_id = qemu_get_thread_id();
1481 cpu->created = true;
1482 cpu->can_do_io = 1;
296af7c9 1483 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1484 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1485
fa7d1867 1486 /* wait for initial kick-off after machine start */
c28e399c 1487 while (first_cpu->stopped) {
d5f8d613 1488 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1489
1490 /* process any pending work */
bdc44640 1491 CPU_FOREACH(cpu) {
37257942 1492 current_cpu = cpu;
182735ef 1493 qemu_wait_io_event_common(cpu);
8e564b4e 1494 }
0ab07c62 1495 }
296af7c9 1496
6546706d
AB
1497 start_tcg_kick_timer();
1498
c93bbbef
AB
1499 cpu = first_cpu;
1500
e5143e30
AB
1501 /* process any pending work */
1502 cpu->exit_request = 1;
1503
296af7c9 1504 while (1) {
d759c951
AB
1505 qemu_mutex_unlock_iothread();
1506 replay_mutex_lock();
1507 qemu_mutex_lock_iothread();
c93bbbef
AB
1508 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1509 qemu_account_warp_timer();
1510
6b8f0187
PB
1511 /* Run the timers here. This is much more efficient than
1512 * waking up the I/O thread and waiting for completion.
1513 */
1514 handle_icount_deadline();
1515
d759c951
AB
1516 replay_mutex_unlock();
1517
c93bbbef
AB
1518 if (!cpu) {
1519 cpu = first_cpu;
1520 }
1521
e5143e30
AB
1522 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1523
791158d9 1524 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1525 current_cpu = cpu;
c93bbbef
AB
1526
1527 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1528 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1529
1530 if (cpu_can_run(cpu)) {
1531 int r;
05248382 1532
d759c951 1533 qemu_mutex_unlock_iothread();
05248382
AB
1534 prepare_icount_for_run(cpu);
1535
c93bbbef 1536 r = tcg_cpu_exec(cpu);
05248382
AB
1537
1538 process_icount_data(cpu);
d759c951 1539 qemu_mutex_lock_iothread();
05248382 1540
c93bbbef
AB
1541 if (r == EXCP_DEBUG) {
1542 cpu_handle_guest_debug(cpu);
1543 break;
08e73c48
PK
1544 } else if (r == EXCP_ATOMIC) {
1545 qemu_mutex_unlock_iothread();
1546 cpu_exec_step_atomic(cpu);
1547 qemu_mutex_lock_iothread();
1548 break;
c93bbbef 1549 }
37257942 1550 } else if (cpu->stop) {
c93bbbef
AB
1551 if (cpu->unplug) {
1552 cpu = CPU_NEXT(cpu);
1553 }
1554 break;
1555 }
1556
e5143e30
AB
1557 cpu = CPU_NEXT(cpu);
1558 } /* while (cpu && !cpu->exit_request).. */
1559
791158d9
AB
1560 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1561 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1562
e5143e30
AB
1563 if (cpu && cpu->exit_request) {
1564 atomic_mb_set(&cpu->exit_request, 0);
1565 }
ac70aafc 1566
013aabdc
CD
1567 if (use_icount && all_cpu_threads_idle()) {
1568 /*
1569 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1570 * in the main_loop, wake it up in order to start the warp timer.
1571 */
1572 qemu_notify_event();
1573 }
1574
a8efa606 1575 qemu_tcg_rr_wait_io_event();
c93bbbef 1576 deal_with_unplugged_cpus();
296af7c9
BS
1577 }
1578
9b0605f9 1579 rcu_unregister_thread();
296af7c9
BS
1580 return NULL;
1581}
1582
b0cb0a66
VP
1583static void *qemu_hax_cpu_thread_fn(void *arg)
1584{
1585 CPUState *cpu = arg;
1586 int r;
b3d3a426 1587
9857c2d2 1588 rcu_register_thread();
b3d3a426 1589 qemu_mutex_lock_iothread();
b0cb0a66 1590 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1591
1592 cpu->thread_id = qemu_get_thread_id();
1593 cpu->created = true;
b0cb0a66
VP
1594 current_cpu = cpu;
1595
1596 hax_init_vcpu(cpu);
1597 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1598 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1599
9857c2d2 1600 do {
b0cb0a66
VP
1601 if (cpu_can_run(cpu)) {
1602 r = hax_smp_cpu_exec(cpu);
1603 if (r == EXCP_DEBUG) {
1604 cpu_handle_guest_debug(cpu);
1605 }
1606 }
1607
db08b687 1608 qemu_wait_io_event(cpu);
9857c2d2
PB
1609 } while (!cpu->unplug || cpu_can_run(cpu));
1610 rcu_unregister_thread();
b0cb0a66
VP
1611 return NULL;
1612}
1613
c97d6d2c
SAGDR
1614/* The HVF-specific vCPU thread function. This one should only run when the host
1615 * CPU supports the VMX "unrestricted guest" feature. */
1616static void *qemu_hvf_cpu_thread_fn(void *arg)
1617{
1618 CPUState *cpu = arg;
1619
1620 int r;
1621
1622 assert(hvf_enabled());
1623
1624 rcu_register_thread();
1625
1626 qemu_mutex_lock_iothread();
1627 qemu_thread_get_self(cpu->thread);
1628
1629 cpu->thread_id = qemu_get_thread_id();
1630 cpu->can_do_io = 1;
1631 current_cpu = cpu;
1632
1633 hvf_init_vcpu(cpu);
1634
1635 /* signal CPU creation */
1636 cpu->created = true;
1637 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1638 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1639
1640 do {
1641 if (cpu_can_run(cpu)) {
1642 r = hvf_vcpu_exec(cpu);
1643 if (r == EXCP_DEBUG) {
1644 cpu_handle_guest_debug(cpu);
1645 }
1646 }
db08b687 1647 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1648 } while (!cpu->unplug || cpu_can_run(cpu));
1649
1650 hvf_vcpu_destroy(cpu);
1651 cpu->created = false;
1652 qemu_cond_signal(&qemu_cpu_cond);
1653 qemu_mutex_unlock_iothread();
8178e637 1654 rcu_unregister_thread();
c97d6d2c
SAGDR
1655 return NULL;
1656}
1657
19306806
JTV
1658static void *qemu_whpx_cpu_thread_fn(void *arg)
1659{
1660 CPUState *cpu = arg;
1661 int r;
1662
1663 rcu_register_thread();
1664
1665 qemu_mutex_lock_iothread();
1666 qemu_thread_get_self(cpu->thread);
1667 cpu->thread_id = qemu_get_thread_id();
1668 current_cpu = cpu;
1669
1670 r = whpx_init_vcpu(cpu);
1671 if (r < 0) {
1672 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1673 exit(1);
1674 }
1675
1676 /* signal CPU creation */
1677 cpu->created = true;
1678 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1679 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1680
1681 do {
1682 if (cpu_can_run(cpu)) {
1683 r = whpx_vcpu_exec(cpu);
1684 if (r == EXCP_DEBUG) {
1685 cpu_handle_guest_debug(cpu);
1686 }
1687 }
1688 while (cpu_thread_is_idle(cpu)) {
1689 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1690 }
1691 qemu_wait_io_event_common(cpu);
1692 } while (!cpu->unplug || cpu_can_run(cpu));
1693
1694 whpx_destroy_vcpu(cpu);
1695 cpu->created = false;
1696 qemu_cond_signal(&qemu_cpu_cond);
1697 qemu_mutex_unlock_iothread();
1698 rcu_unregister_thread();
c97d6d2c
SAGDR
1699 return NULL;
1700}
1701
b0cb0a66
VP
1702#ifdef _WIN32
1703static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1704{
1705}
1706#endif
1707
37257942
AB
1708/* Multi-threaded TCG
1709 *
1710 * In the multi-threaded case each vCPU has its own thread. The TLS
1711 * variable current_cpu can be used deep in the code to find the
1712 * current CPUState for a given thread.
1713 */
1714
1715static void *qemu_tcg_cpu_thread_fn(void *arg)
1716{
1717 CPUState *cpu = arg;
1718
f28d0dfd 1719 assert(tcg_enabled());
bf51c720
AB
1720 g_assert(!use_icount);
1721
37257942 1722 rcu_register_thread();
3468b59e 1723 tcg_register_thread();
37257942
AB
1724
1725 qemu_mutex_lock_iothread();
1726 qemu_thread_get_self(cpu->thread);
1727
1728 cpu->thread_id = qemu_get_thread_id();
1729 cpu->created = true;
1730 cpu->can_do_io = 1;
1731 current_cpu = cpu;
1732 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1733 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1734
1735 /* process any pending work */
1736 cpu->exit_request = 1;
1737
54961aac 1738 do {
37257942
AB
1739 if (cpu_can_run(cpu)) {
1740 int r;
d759c951 1741 qemu_mutex_unlock_iothread();
37257942 1742 r = tcg_cpu_exec(cpu);
d759c951 1743 qemu_mutex_lock_iothread();
37257942
AB
1744 switch (r) {
1745 case EXCP_DEBUG:
1746 cpu_handle_guest_debug(cpu);
1747 break;
1748 case EXCP_HALTED:
1749 /* during start-up the vCPU is reset and the thread is
1750 * kicked several times. If we don't ensure we go back
1751 * to sleep in the halted state we won't cleanly
1752 * start-up when the vCPU is enabled.
1753 *
1754 * cpu->halted should ensure we sleep in wait_io_event
1755 */
1756 g_assert(cpu->halted);
1757 break;
08e73c48
PK
1758 case EXCP_ATOMIC:
1759 qemu_mutex_unlock_iothread();
1760 cpu_exec_step_atomic(cpu);
1761 qemu_mutex_lock_iothread();
37257942
AB
1762 default:
1763 /* Ignore everything else? */
1764 break;
1765 }
1766 }
1767
37257942 1768 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1769 qemu_wait_io_event(cpu);
9b0605f9 1770 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1771
9b0605f9
PB
1772 qemu_tcg_destroy_vcpu(cpu);
1773 cpu->created = false;
1774 qemu_cond_signal(&qemu_cpu_cond);
1775 qemu_mutex_unlock_iothread();
1776 rcu_unregister_thread();
37257942
AB
1777 return NULL;
1778}
1779
2ff09a40 1780static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1781{
1782#ifndef _WIN32
1783 int err;
1784
e0c38211
PB
1785 if (cpu->thread_kicked) {
1786 return;
9102deda 1787 }
e0c38211 1788 cpu->thread_kicked = true;
814e612e 1789 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1790 if (err && err != ESRCH) {
cc015e9a
PB
1791 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1792 exit(1);
1793 }
1794#else /* _WIN32 */
b0cb0a66 1795 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1796 if (whpx_enabled()) {
1797 whpx_vcpu_kick(cpu);
1798 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1799 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1800 __func__, GetLastError());
1801 exit(1);
1802 }
1803 }
e0c38211
PB
1804#endif
1805}
ed9164a3 1806
c08d7424 1807void qemu_cpu_kick(CPUState *cpu)
296af7c9 1808{
f5c121b8 1809 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1810 if (tcg_enabled()) {
791158d9 1811 cpu_exit(cpu);
37257942 1812 /* NOP unless doing single-thread RR */
791158d9 1813 qemu_cpu_kick_rr_cpu();
e0c38211 1814 } else {
b0cb0a66
VP
1815 if (hax_enabled()) {
1816 /*
1817 * FIXME: race condition with the exit_request check in
1818 * hax_vcpu_hax_exec
1819 */
1820 cpu->exit_request = 1;
1821 }
e0c38211
PB
1822 qemu_cpu_kick_thread(cpu);
1823 }
296af7c9
BS
1824}
1825
46d62fac 1826void qemu_cpu_kick_self(void)
296af7c9 1827{
4917cf44 1828 assert(current_cpu);
9102deda 1829 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1830}
1831
60e82579 1832bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1833{
814e612e 1834 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1835}
1836
79e2b9ae 1837bool qemu_in_vcpu_thread(void)
aa723c23 1838{
4917cf44 1839 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1840}
1841
afbe7053
PB
1842static __thread bool iothread_locked = false;
1843
1844bool qemu_mutex_iothread_locked(void)
1845{
1846 return iothread_locked;
1847}
1848
cb764d06
EC
1849/*
1850 * The BQL is taken from so many places that it is worth profiling the
1851 * callers directly, instead of funneling them all through a single function.
1852 */
1853void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1854{
cb764d06
EC
1855 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1856
8d04fb55 1857 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1858 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1859 iothread_locked = true;
296af7c9
BS
1860}
1861
1862void qemu_mutex_unlock_iothread(void)
1863{
8d04fb55 1864 g_assert(qemu_mutex_iothread_locked());
afbe7053 1865 iothread_locked = false;
296af7c9
BS
1866 qemu_mutex_unlock(&qemu_global_mutex);
1867}
1868
e8faee06 1869static bool all_vcpus_paused(void)
296af7c9 1870{
bdc44640 1871 CPUState *cpu;
296af7c9 1872
bdc44640 1873 CPU_FOREACH(cpu) {
182735ef 1874 if (!cpu->stopped) {
e8faee06 1875 return false;
0ab07c62 1876 }
296af7c9
BS
1877 }
1878
e8faee06 1879 return true;
296af7c9
BS
1880}
1881
1882void pause_all_vcpus(void)
1883{
bdc44640 1884 CPUState *cpu;
296af7c9 1885
40daca54 1886 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1887 CPU_FOREACH(cpu) {
ebd05fea
DH
1888 if (qemu_cpu_is_self(cpu)) {
1889 qemu_cpu_stop(cpu, true);
1890 } else {
1891 cpu->stop = true;
1892 qemu_cpu_kick(cpu);
1893 }
d798e974
JK
1894 }
1895
d759c951
AB
1896 /* We need to drop the replay_lock so any vCPU threads woken up
1897 * can finish their replay tasks
1898 */
1899 replay_mutex_unlock();
1900
296af7c9 1901 while (!all_vcpus_paused()) {
be7d6c57 1902 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1903 CPU_FOREACH(cpu) {
182735ef 1904 qemu_cpu_kick(cpu);
296af7c9
BS
1905 }
1906 }
d759c951
AB
1907
1908 qemu_mutex_unlock_iothread();
1909 replay_mutex_lock();
1910 qemu_mutex_lock_iothread();
296af7c9
BS
1911}
1912
2993683b
IM
1913void cpu_resume(CPUState *cpu)
1914{
1915 cpu->stop = false;
1916 cpu->stopped = false;
1917 qemu_cpu_kick(cpu);
1918}
1919
296af7c9
BS
1920void resume_all_vcpus(void)
1921{
bdc44640 1922 CPUState *cpu;
296af7c9 1923
40daca54 1924 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1925 CPU_FOREACH(cpu) {
182735ef 1926 cpu_resume(cpu);
296af7c9
BS
1927 }
1928}
1929
dbadee4f 1930void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1931{
1932 cpu->stop = true;
1933 cpu->unplug = true;
1934 qemu_cpu_kick(cpu);
dbadee4f
PB
1935 qemu_mutex_unlock_iothread();
1936 qemu_thread_join(cpu->thread);
1937 qemu_mutex_lock_iothread();
2c579042
BR
1938}
1939
4900116e
DDAG
1940/* For temporary buffers for forming a name */
1941#define VCPU_THREAD_NAME_SIZE 16
1942
e5ab30a2 1943static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1944{
4900116e 1945 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1946 static QemuCond *single_tcg_halt_cond;
1947 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1948 static int tcg_region_inited;
1949
f28d0dfd 1950 assert(tcg_enabled());
e8feb96f
EC
1951 /*
1952 * Initialize TCG regions--once. Now is a good time, because:
1953 * (1) TCG's init context, prologue and target globals have been set up.
1954 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1955 * -accel flag is processed, so the check doesn't work then).
1956 */
1957 if (!tcg_region_inited) {
1958 tcg_region_inited = 1;
1959 tcg_region_init();
1960 }
4900116e 1961
37257942 1962 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1963 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1964 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1965 qemu_cond_init(cpu->halt_cond);
37257942
AB
1966
1967 if (qemu_tcg_mttcg_enabled()) {
1968 /* create a thread per vCPU with TCG (MTTCG) */
1969 parallel_cpus = true;
1970 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1971 cpu->cpu_index);
37257942
AB
1972
1973 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1974 cpu, QEMU_THREAD_JOINABLE);
1975
1976 } else {
1977 /* share a single thread for all cpus with TCG */
1978 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1979 qemu_thread_create(cpu->thread, thread_name,
1980 qemu_tcg_rr_cpu_thread_fn,
1981 cpu, QEMU_THREAD_JOINABLE);
1982
1983 single_tcg_halt_cond = cpu->halt_cond;
1984 single_tcg_cpu_thread = cpu->thread;
1985 }
1ecf47bf 1986#ifdef _WIN32
814e612e 1987 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1988#endif
296af7c9 1989 } else {
37257942
AB
1990 /* For non-MTTCG cases we share the thread */
1991 cpu->thread = single_tcg_cpu_thread;
1992 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1993 cpu->thread_id = first_cpu->thread_id;
1994 cpu->can_do_io = 1;
1995 cpu->created = true;
296af7c9
BS
1996 }
1997}
1998
b0cb0a66
VP
1999static void qemu_hax_start_vcpu(CPUState *cpu)
2000{
2001 char thread_name[VCPU_THREAD_NAME_SIZE];
2002
2003 cpu->thread = g_malloc0(sizeof(QemuThread));
2004 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2005 qemu_cond_init(cpu->halt_cond);
2006
2007 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2008 cpu->cpu_index);
2009 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2010 cpu, QEMU_THREAD_JOINABLE);
2011#ifdef _WIN32
2012 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2013#endif
b0cb0a66
VP
2014}
2015
48a106bd 2016static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2017{
4900116e
DDAG
2018 char thread_name[VCPU_THREAD_NAME_SIZE];
2019
814e612e 2020 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2021 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2022 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2023 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2024 cpu->cpu_index);
2025 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2026 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2027}
2028
c97d6d2c
SAGDR
2029static void qemu_hvf_start_vcpu(CPUState *cpu)
2030{
2031 char thread_name[VCPU_THREAD_NAME_SIZE];
2032
2033 /* HVF currently does not support TCG, and only runs in
2034 * unrestricted-guest mode. */
2035 assert(hvf_enabled());
2036
2037 cpu->thread = g_malloc0(sizeof(QemuThread));
2038 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2039 qemu_cond_init(cpu->halt_cond);
2040
2041 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2042 cpu->cpu_index);
2043 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2044 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2045}
2046
19306806
JTV
2047static void qemu_whpx_start_vcpu(CPUState *cpu)
2048{
2049 char thread_name[VCPU_THREAD_NAME_SIZE];
2050
2051 cpu->thread = g_malloc0(sizeof(QemuThread));
2052 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2053 qemu_cond_init(cpu->halt_cond);
2054 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2055 cpu->cpu_index);
2056 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2057 cpu, QEMU_THREAD_JOINABLE);
2058#ifdef _WIN32
2059 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2060#endif
19306806
JTV
2061}
2062
10a9021d 2063static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2064{
4900116e
DDAG
2065 char thread_name[VCPU_THREAD_NAME_SIZE];
2066
814e612e 2067 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2068 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2069 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2070 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2071 cpu->cpu_index);
2072 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2073 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2074}
2075
c643bed9 2076void qemu_init_vcpu(CPUState *cpu)
296af7c9 2077{
ce3960eb
AF
2078 cpu->nr_cores = smp_cores;
2079 cpu->nr_threads = smp_threads;
f324e766 2080 cpu->stopped = true;
9c09a251 2081 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2082
2083 if (!cpu->as) {
2084 /* If the target cpu hasn't set up any address spaces itself,
2085 * give it the default one.
2086 */
12ebc9a7 2087 cpu->num_ases = 1;
80ceb07a 2088 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2089 }
2090
0ab07c62 2091 if (kvm_enabled()) {
48a106bd 2092 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2093 } else if (hax_enabled()) {
2094 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2095 } else if (hvf_enabled()) {
2096 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2097 } else if (tcg_enabled()) {
e5ab30a2 2098 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2099 } else if (whpx_enabled()) {
2100 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2101 } else {
10a9021d 2102 qemu_dummy_start_vcpu(cpu);
0ab07c62 2103 }
81e96311
DH
2104
2105 while (!cpu->created) {
2106 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2107 }
296af7c9
BS
2108}
2109
b4a3d965 2110void cpu_stop_current(void)
296af7c9 2111{
4917cf44 2112 if (current_cpu) {
0ec7e677
PM
2113 current_cpu->stop = true;
2114 cpu_exit(current_cpu);
b4a3d965 2115 }
296af7c9
BS
2116}
2117
56983463 2118int vm_stop(RunState state)
296af7c9 2119{
aa723c23 2120 if (qemu_in_vcpu_thread()) {
74892d24 2121 qemu_system_vmstop_request_prepare();
1dfb4dd9 2122 qemu_system_vmstop_request(state);
296af7c9
BS
2123 /*
2124 * FIXME: should not return to device code in case
2125 * vm_stop() has been requested.
2126 */
b4a3d965 2127 cpu_stop_current();
56983463 2128 return 0;
296af7c9 2129 }
56983463 2130
4486e89c 2131 return do_vm_stop(state, true);
296af7c9
BS
2132}
2133
2d76e823
CI
2134/**
2135 * Prepare for (re)starting the VM.
2136 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2137 * running or in case of an error condition), 0 otherwise.
2138 */
2139int vm_prepare_start(void)
2140{
2141 RunState requested;
2d76e823
CI
2142
2143 qemu_vmstop_requested(&requested);
2144 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2145 return -1;
2146 }
2147
2148 /* Ensure that a STOP/RESUME pair of events is emitted if a
2149 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2150 * example, according to documentation is always followed by
2151 * the STOP event.
2152 */
2153 if (runstate_is_running()) {
3ab72385
PX
2154 qapi_event_send_stop();
2155 qapi_event_send_resume();
f056158d 2156 return -1;
2d76e823
CI
2157 }
2158
2159 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2160 qapi_event_send_resume();
f056158d
MA
2161
2162 replay_enable_events();
2163 cpu_enable_ticks();
2164 runstate_set(RUN_STATE_RUNNING);
2165 vm_state_notify(1, RUN_STATE_RUNNING);
2166 return 0;
2d76e823
CI
2167}
2168
2169void vm_start(void)
2170{
2171 if (!vm_prepare_start()) {
2172 resume_all_vcpus();
2173 }
2174}
2175
8a9236f1
LC
2176/* does a state transition even if the VM is already stopped,
2177 current state is forgotten forever */
56983463 2178int vm_stop_force_state(RunState state)
8a9236f1
LC
2179{
2180 if (runstate_is_running()) {
56983463 2181 return vm_stop(state);
8a9236f1
LC
2182 } else {
2183 runstate_set(state);
b2780d32
WC
2184
2185 bdrv_drain_all();
594a45ce
KW
2186 /* Make sure to return an error if the flush in a previous vm_stop()
2187 * failed. */
22af08ea 2188 return bdrv_flush_all();
8a9236f1
LC
2189 }
2190}
2191
0442428a 2192void list_cpus(const char *optarg)
262353cb
BS
2193{
2194 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2195#if defined(cpu_list)
0442428a 2196 cpu_list();
262353cb
BS
2197#endif
2198}
de0b36b6 2199
0cfd6a9a
LC
2200void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2201 bool has_cpu, int64_t cpu_index, Error **errp)
2202{
2203 FILE *f;
2204 uint32_t l;
55e5c285 2205 CPUState *cpu;
0cfd6a9a 2206 uint8_t buf[1024];
0dc9daf0 2207 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2208
2209 if (!has_cpu) {
2210 cpu_index = 0;
2211 }
2212
151d1322
AF
2213 cpu = qemu_get_cpu(cpu_index);
2214 if (cpu == NULL) {
c6bd8c70
MA
2215 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2216 "a CPU number");
0cfd6a9a
LC
2217 return;
2218 }
2219
2220 f = fopen(filename, "wb");
2221 if (!f) {
618da851 2222 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2223 return;
2224 }
2225
2226 while (size != 0) {
2227 l = sizeof(buf);
2228 if (l > size)
2229 l = size;
2f4d0f59 2230 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2231 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2232 " specified", orig_addr, orig_size);
2f4d0f59
AK
2233 goto exit;
2234 }
0cfd6a9a 2235 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2236 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2237 goto exit;
2238 }
2239 addr += l;
2240 size -= l;
2241 }
2242
2243exit:
2244 fclose(f);
2245}
6d3962bf
LC
2246
2247void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2248 Error **errp)
2249{
2250 FILE *f;
2251 uint32_t l;
2252 uint8_t buf[1024];
2253
2254 f = fopen(filename, "wb");
2255 if (!f) {
618da851 2256 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2257 return;
2258 }
2259
2260 while (size != 0) {
2261 l = sizeof(buf);
2262 if (l > size)
2263 l = size;
eb6282f2 2264 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2265 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2266 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2267 goto exit;
2268 }
2269 addr += l;
2270 size -= l;
2271 }
2272
2273exit:
2274 fclose(f);
2275}
ab49ab5c
LC
2276
2277void qmp_inject_nmi(Error **errp)
2278{
9cb805fd 2279 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2280}
27498bef 2281
76c86615 2282void dump_drift_info(void)
27498bef
ST
2283{
2284 if (!use_icount) {
2285 return;
2286 }
2287
76c86615 2288 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2289 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2290 if (icount_align_option) {
76c86615
MA
2291 qemu_printf("Max guest delay %"PRIi64" ms\n",
2292 -max_delay / SCALE_MS);
2293 qemu_printf("Max guest advance %"PRIi64" ms\n",
2294 max_advance / SCALE_MS);
27498bef 2295 } else {
76c86615
MA
2296 qemu_printf("Max guest delay NA\n");
2297 qemu_printf("Max guest advance NA\n");
27498bef
ST
2298 }
2299}