]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
qdev: qdev_init_nofail() is now unused, drop
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
9ec374a7 28#include "qemu/cutils.h"
d6454270 29#include "migration/vmstate.h"
83c9089e 30#include "monitor/monitor.h"
e688df6b 31#include "qapi/error.h"
112ed241 32#include "qapi/qapi-commands-misc.h"
9af23989 33#include "qapi/qapi-events-run-state.h"
a4e15de9 34#include "qapi/qmp/qerror.h"
d49b6836 35#include "qemu/error-report.h"
76c86615 36#include "qemu/qemu-print.h"
14a48c1d 37#include "sysemu/tcg.h"
da31d594 38#include "sysemu/block-backend.h"
022c62cb 39#include "exec/gdbstub.h"
9c17d615 40#include "sysemu/dma.h"
b3946626 41#include "sysemu/hw_accel.h"
9c17d615 42#include "sysemu/kvm.h"
b0cb0a66 43#include "sysemu/hax.h"
c97d6d2c 44#include "sysemu/hvf.h"
19306806 45#include "sysemu/whpx.h"
63c91552 46#include "exec/exec-all.h"
296af7c9 47
1de7afc9 48#include "qemu/thread.h"
30865f31 49#include "qemu/plugin.h"
9c17d615
PB
50#include "sysemu/cpus.h"
51#include "sysemu/qtest.h"
1de7afc9 52#include "qemu/main-loop.h"
922a01a0 53#include "qemu/option.h"
1de7afc9 54#include "qemu/bitmap.h"
cb365646 55#include "qemu/seqlock.h"
9c09a251 56#include "qemu/guest-random.h"
dcb32f1d 57#include "tcg/tcg.h"
9cb805fd 58#include "hw/nmi.h"
8b427044 59#include "sysemu/replay.h"
54d31236 60#include "sysemu/runstate.h"
5cc8767d 61#include "hw/boards.h"
650d103d 62#include "hw/hw.h"
0ff0fc19 63
6d9cb73c
JK
64#ifdef CONFIG_LINUX
65
66#include <sys/prctl.h>
67
c0532a76
MT
68#ifndef PR_MCE_KILL
69#define PR_MCE_KILL 33
70#endif
71
6d9cb73c
JK
72#ifndef PR_MCE_KILL_SET
73#define PR_MCE_KILL_SET 1
74#endif
75
76#ifndef PR_MCE_KILL_EARLY
77#define PR_MCE_KILL_EARLY 1
78#endif
79
80#endif /* CONFIG_LINUX */
81
bd1f7ff4
YK
82static QemuMutex qemu_global_mutex;
83
27498bef
ST
84int64_t max_delay;
85int64_t max_advance;
296af7c9 86
2adcc85d
JH
87/* vcpu throttling controls */
88static QEMUTimer *throttle_timer;
89static unsigned int throttle_percentage;
90
91#define CPU_THROTTLE_PCT_MIN 1
92#define CPU_THROTTLE_PCT_MAX 99
93#define CPU_THROTTLE_TIMESLICE_NS 10000000
94
321bc0b2
TC
95bool cpu_is_stopped(CPUState *cpu)
96{
97 return cpu->stopped || !runstate_is_running();
98}
99
a98ae1d8 100static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 101{
c64ca814 102 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
103 return false;
104 }
321bc0b2 105 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
106 return true;
107 }
8c2e1b00 108 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 109 kvm_halt_in_kernel()) {
ac873f1e
PM
110 return false;
111 }
112 return true;
113}
114
115static bool all_cpu_threads_idle(void)
116{
182735ef 117 CPUState *cpu;
ac873f1e 118
bdc44640 119 CPU_FOREACH(cpu) {
182735ef 120 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
121 return false;
122 }
123 }
124 return true;
125}
126
946fb27c
PB
127/***********************************************************/
128/* guest cycle counter */
129
a3270e19
PB
130/* Protected by TimersState seqlock */
131
5045e9d9 132static bool icount_sleep = true;
946fb27c
PB
133/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
134#define MAX_ICOUNT_SHIFT 10
a3270e19 135
946fb27c 136typedef struct TimersState {
cb365646 137 /* Protected by BQL. */
946fb27c
PB
138 int64_t cpu_ticks_prev;
139 int64_t cpu_ticks_offset;
cb365646 140
94377115
PB
141 /* Protect fields that can be respectively read outside the
142 * BQL, and written from multiple threads.
cb365646
LPF
143 */
144 QemuSeqLock vm_clock_seqlock;
94377115
PB
145 QemuSpin vm_clock_lock;
146
147 int16_t cpu_ticks_enabled;
c96778bb 148
c1ff073c 149 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
150 int16_t icount_time_shift;
151
c96778bb
FK
152 /* Compensate for varying guest execution speed. */
153 int64_t qemu_icount_bias;
94377115
PB
154
155 int64_t vm_clock_warp_start;
156 int64_t cpu_clock_offset;
157
c96778bb
FK
158 /* Only written by TCG thread */
159 int64_t qemu_icount;
94377115 160
b39e3f34 161 /* for adjusting icount */
b39e3f34
PD
162 QEMUTimer *icount_rt_timer;
163 QEMUTimer *icount_vm_timer;
164 QEMUTimer *icount_warp_timer;
946fb27c
PB
165} TimersState;
166
d9cd4007 167static TimersState timers_state;
8d4e9146
FK
168bool mttcg_enabled;
169
946fb27c 170
e4cd9657
AB
171/* The current number of executed instructions is based on what we
172 * originally budgeted minus the current state of the decrementing
173 * icount counters in extra/u16.low.
174 */
175static int64_t cpu_get_icount_executed(CPUState *cpu)
176{
5e140196
RH
177 return (cpu->icount_budget -
178 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
179}
180
512d3c80
AB
181/*
182 * Update the global shared timer_state.qemu_icount to take into
183 * account executed instructions. This is done by the TCG vCPU
184 * thread so the main-loop can see time has moved forward.
185 */
9b4e6f49 186static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
187{
188 int64_t executed = cpu_get_icount_executed(cpu);
189 cpu->icount_budget -= executed;
190
38adcb6e
EC
191 atomic_set_i64(&timers_state.qemu_icount,
192 timers_state.qemu_icount + executed);
9b4e6f49
PB
193}
194
195/*
196 * Update the global shared timer_state.qemu_icount to take into
197 * account executed instructions. This is done by the TCG vCPU
198 * thread so the main-loop can see time has moved forward.
199 */
200void cpu_update_icount(CPUState *cpu)
201{
202 seqlock_write_lock(&timers_state.vm_clock_seqlock,
203 &timers_state.vm_clock_lock);
204 cpu_update_icount_locked(cpu);
94377115
PB
205 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206 &timers_state.vm_clock_lock);
512d3c80
AB
207}
208
c1ff073c 209static int64_t cpu_get_icount_raw_locked(void)
946fb27c 210{
4917cf44 211 CPUState *cpu = current_cpu;
946fb27c 212
243c5f77 213 if (cpu && cpu->running) {
414b15c9 214 if (!cpu->can_do_io) {
493d89bf 215 error_report("Bad icount read");
2a62914b 216 exit(1);
946fb27c 217 }
e4cd9657 218 /* Take into account what has run */
9b4e6f49 219 cpu_update_icount_locked(cpu);
946fb27c 220 }
38adcb6e
EC
221 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
222 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
223}
224
2a62914b
PD
225static int64_t cpu_get_icount_locked(void)
226{
c1ff073c 227 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
228 return atomic_read_i64(&timers_state.qemu_icount_bias) +
229 cpu_icount_to_ns(icount);
c1ff073c
PB
230}
231
232int64_t cpu_get_icount_raw(void)
233{
234 int64_t icount;
235 unsigned start;
236
237 do {
238 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239 icount = cpu_get_icount_raw_locked();
240 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242 return icount;
946fb27c
PB
243}
244
c1ff073c 245/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
246int64_t cpu_get_icount(void)
247{
248 int64_t icount;
249 unsigned start;
250
251 do {
252 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253 icount = cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
255
256 return icount;
257}
258
3f031313
FK
259int64_t cpu_icount_to_ns(int64_t icount)
260{
c1ff073c 261 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
262}
263
f2a4ad6d
PB
264static int64_t cpu_get_ticks_locked(void)
265{
266 int64_t ticks = timers_state.cpu_ticks_offset;
267 if (timers_state.cpu_ticks_enabled) {
268 ticks += cpu_get_host_ticks();
269 }
270
271 if (timers_state.cpu_ticks_prev > ticks) {
272 /* Non increasing ticks may happen if the host uses software suspend. */
273 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
274 ticks = timers_state.cpu_ticks_prev;
275 }
276
277 timers_state.cpu_ticks_prev = ticks;
278 return ticks;
279}
280
d90f3cca
C
281/* return the time elapsed in VM between vm_start and vm_stop. Unless
282 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
283 * counter.
d90f3cca 284 */
946fb27c
PB
285int64_t cpu_get_ticks(void)
286{
5f3e3101
PB
287 int64_t ticks;
288
946fb27c
PB
289 if (use_icount) {
290 return cpu_get_icount();
291 }
5f3e3101 292
f2a4ad6d
PB
293 qemu_spin_lock(&timers_state.vm_clock_lock);
294 ticks = cpu_get_ticks_locked();
295 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 296 return ticks;
946fb27c
PB
297}
298
cb365646 299static int64_t cpu_get_clock_locked(void)
946fb27c 300{
1d45cea5 301 int64_t time;
cb365646 302
1d45cea5 303 time = timers_state.cpu_clock_offset;
5f3e3101 304 if (timers_state.cpu_ticks_enabled) {
1d45cea5 305 time += get_clock();
946fb27c 306 }
cb365646 307
1d45cea5 308 return time;
cb365646
LPF
309}
310
d90f3cca 311/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
312 * the time between vm_start and vm_stop
313 */
cb365646
LPF
314int64_t cpu_get_clock(void)
315{
316 int64_t ti;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 ti = cpu_get_clock_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return ti;
946fb27c
PB
325}
326
cb365646 327/* enable cpu_get_ticks()
3224e878 328 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 329 */
946fb27c
PB
330void cpu_enable_ticks(void)
331{
94377115
PB
332 seqlock_write_lock(&timers_state.vm_clock_seqlock,
333 &timers_state.vm_clock_lock);
946fb27c 334 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 335 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
336 timers_state.cpu_clock_offset -= get_clock();
337 timers_state.cpu_ticks_enabled = 1;
338 }
94377115
PB
339 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
340 &timers_state.vm_clock_lock);
946fb27c
PB
341}
342
343/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 344 * cpu_get_ticks() after that.
3224e878 345 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 346 */
946fb27c
PB
347void cpu_disable_ticks(void)
348{
94377115
PB
349 seqlock_write_lock(&timers_state.vm_clock_seqlock,
350 &timers_state.vm_clock_lock);
946fb27c 351 if (timers_state.cpu_ticks_enabled) {
4a7428c5 352 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 353 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
354 timers_state.cpu_ticks_enabled = 0;
355 }
94377115
PB
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
357 &timers_state.vm_clock_lock);
946fb27c
PB
358}
359
360/* Correlation between real and virtual time is always going to be
361 fairly approximate, so ignore small variation.
362 When the guest is idle real and virtual time will be aligned in
363 the IO wait loop. */
73bcb24d 364#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
365
366static void icount_adjust(void)
367{
368 int64_t cur_time;
369 int64_t cur_icount;
370 int64_t delta;
a3270e19
PB
371
372 /* Protected by TimersState mutex. */
946fb27c 373 static int64_t last_delta;
468cc7cf 374
946fb27c
PB
375 /* If the VM is not running, then do nothing. */
376 if (!runstate_is_running()) {
377 return;
378 }
468cc7cf 379
94377115
PB
380 seqlock_write_lock(&timers_state.vm_clock_seqlock,
381 &timers_state.vm_clock_lock);
b8164e68
PD
382 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
383 cpu_get_clock_locked());
17a15f1b 384 cur_icount = cpu_get_icount_locked();
468cc7cf 385
946fb27c
PB
386 delta = cur_icount - cur_time;
387 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
388 if (delta > 0
389 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 390 && timers_state.icount_time_shift > 0) {
946fb27c 391 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
392 atomic_set(&timers_state.icount_time_shift,
393 timers_state.icount_time_shift - 1);
946fb27c
PB
394 }
395 if (delta < 0
396 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 397 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 398 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
399 atomic_set(&timers_state.icount_time_shift,
400 timers_state.icount_time_shift + 1);
946fb27c
PB
401 }
402 last_delta = delta;
c97595d1
EC
403 atomic_set_i64(&timers_state.qemu_icount_bias,
404 cur_icount - (timers_state.qemu_icount
405 << timers_state.icount_time_shift));
94377115
PB
406 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
407 &timers_state.vm_clock_lock);
946fb27c
PB
408}
409
410static void icount_adjust_rt(void *opaque)
411{
b39e3f34 412 timer_mod(timers_state.icount_rt_timer,
1979b908 413 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
414 icount_adjust();
415}
416
417static void icount_adjust_vm(void *opaque)
418{
b39e3f34 419 timer_mod(timers_state.icount_vm_timer,
40daca54 420 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 421 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
422 icount_adjust();
423}
424
425static int64_t qemu_icount_round(int64_t count)
426{
c1ff073c
PB
427 int shift = atomic_read(&timers_state.icount_time_shift);
428 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
429}
430
efab87cf 431static void icount_warp_rt(void)
946fb27c 432{
ccffff48
AB
433 unsigned seq;
434 int64_t warp_start;
435
17a15f1b
PB
436 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
437 * changes from -1 to another value, so the race here is okay.
438 */
ccffff48
AB
439 do {
440 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 441 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
442 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
443
444 if (warp_start == -1) {
946fb27c
PB
445 return;
446 }
447
94377115
PB
448 seqlock_write_lock(&timers_state.vm_clock_seqlock,
449 &timers_state.vm_clock_lock);
946fb27c 450 if (runstate_is_running()) {
74c0b816
PB
451 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
452 cpu_get_clock_locked());
8ed961d9
PB
453 int64_t warp_delta;
454
b39e3f34 455 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 456 if (use_icount == 2) {
946fb27c 457 /*
40daca54 458 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
459 * far ahead of real time.
460 */
17a15f1b 461 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 462 int64_t delta = clock - cur_icount;
8ed961d9 463 warp_delta = MIN(warp_delta, delta);
946fb27c 464 }
c97595d1
EC
465 atomic_set_i64(&timers_state.qemu_icount_bias,
466 timers_state.qemu_icount_bias + warp_delta);
946fb27c 467 }
b39e3f34 468 timers_state.vm_clock_warp_start = -1;
94377115
PB
469 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
470 &timers_state.vm_clock_lock);
8ed961d9
PB
471
472 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
473 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
474 }
946fb27c
PB
475}
476
e76d1798 477static void icount_timer_cb(void *opaque)
efab87cf 478{
e76d1798
PD
479 /* No need for a checkpoint because the timer already synchronizes
480 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
481 */
482 icount_warp_rt();
efab87cf
PD
483}
484
8156be56
PB
485void qtest_clock_warp(int64_t dest)
486{
40daca54 487 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 488 AioContext *aio_context;
8156be56 489 assert(qtest_enabled());
efef88b3 490 aio_context = qemu_get_aio_context();
8156be56 491 while (clock < dest) {
dcb15780
PD
492 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
493 QEMU_TIMER_ATTR_ALL);
c9299e2f 494 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 495
94377115
PB
496 seqlock_write_lock(&timers_state.vm_clock_seqlock,
497 &timers_state.vm_clock_lock);
c97595d1
EC
498 atomic_set_i64(&timers_state.qemu_icount_bias,
499 timers_state.qemu_icount_bias + warp);
94377115
PB
500 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
501 &timers_state.vm_clock_lock);
17a15f1b 502
40daca54 503 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 504 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 505 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 506 }
40daca54 507 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
508}
509
e76d1798 510void qemu_start_warp_timer(void)
946fb27c 511{
ce78d18c 512 int64_t clock;
946fb27c
PB
513 int64_t deadline;
514
e76d1798 515 if (!use_icount) {
946fb27c
PB
516 return;
517 }
518
8bd7f71d
PD
519 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
520 * do not fire, so computing the deadline does not make sense.
521 */
522 if (!runstate_is_running()) {
523 return;
524 }
525
0c08185f
PD
526 if (replay_mode != REPLAY_MODE_PLAY) {
527 if (!all_cpu_threads_idle()) {
528 return;
529 }
8bd7f71d 530
0c08185f
PD
531 if (qtest_enabled()) {
532 /* When testing, qtest commands advance icount. */
533 return;
534 }
946fb27c 535
0c08185f
PD
536 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
537 } else {
538 /* warp clock deterministically in record/replay mode */
539 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
540 /* vCPU is sleeping and warp can't be started.
541 It is probably a race condition: notification sent
542 to vCPU was processed in advance and vCPU went to sleep.
543 Therefore we have to wake it up for doing someting. */
544 if (replay_has_checkpoint()) {
545 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
546 }
547 return;
548 }
8156be56
PB
549 }
550
ac70aafc 551 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 552 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
dcb15780
PD
553 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
554 ~QEMU_TIMER_ATTR_EXTERNAL);
ce78d18c 555 if (deadline < 0) {
d7a0f71d
VC
556 static bool notified;
557 if (!icount_sleep && !notified) {
3dc6f869 558 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
559 notified = true;
560 }
ce78d18c 561 return;
ac70aafc
AB
562 }
563
946fb27c
PB
564 if (deadline > 0) {
565 /*
40daca54 566 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
567 * sleep. Otherwise, the CPU might be waiting for a future timer
568 * interrupt to wake it up, but the interrupt never comes because
569 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 570 * QEMU_CLOCK_VIRTUAL.
946fb27c 571 */
5045e9d9
VC
572 if (!icount_sleep) {
573 /*
574 * We never let VCPUs sleep in no sleep icount mode.
575 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
576 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
577 * It is useful when we want a deterministic execution time,
578 * isolated from host latencies.
579 */
94377115
PB
580 seqlock_write_lock(&timers_state.vm_clock_seqlock,
581 &timers_state.vm_clock_lock);
c97595d1
EC
582 atomic_set_i64(&timers_state.qemu_icount_bias,
583 timers_state.qemu_icount_bias + deadline);
94377115
PB
584 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
585 &timers_state.vm_clock_lock);
5045e9d9
VC
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
94377115
PB
596 seqlock_write_lock(&timers_state.vm_clock_seqlock,
597 &timers_state.vm_clock_lock);
b39e3f34
PD
598 if (timers_state.vm_clock_warp_start == -1
599 || timers_state.vm_clock_warp_start > clock) {
600 timers_state.vm_clock_warp_start = clock;
5045e9d9 601 }
94377115
PB
602 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
603 &timers_state.vm_clock_lock);
b39e3f34
PD
604 timer_mod_anticipate(timers_state.icount_warp_timer,
605 clock + deadline);
ce78d18c 606 }
ac70aafc 607 } else if (deadline == 0) {
40daca54 608 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
609 }
610}
611
e76d1798
PD
612static void qemu_account_warp_timer(void)
613{
614 if (!use_icount || !icount_sleep) {
615 return;
616 }
617
618 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
619 * do not fire, so computing the deadline does not make sense.
620 */
621 if (!runstate_is_running()) {
622 return;
623 }
624
625 /* warp clock deterministically in record/replay mode */
626 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
627 return;
628 }
629
b39e3f34 630 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
631 icount_warp_rt();
632}
633
d09eae37
FK
634static bool icount_state_needed(void *opaque)
635{
636 return use_icount;
637}
638
b39e3f34
PD
639static bool warp_timer_state_needed(void *opaque)
640{
641 TimersState *s = opaque;
642 return s->icount_warp_timer != NULL;
643}
644
645static bool adjust_timers_state_needed(void *opaque)
646{
647 TimersState *s = opaque;
648 return s->icount_rt_timer != NULL;
649}
650
b8164e68
PD
651static bool shift_state_needed(void *opaque)
652{
653 return use_icount == 2;
654}
655
b39e3f34
PD
656/*
657 * Subsection for warp timer migration is optional, because may not be created
658 */
659static const VMStateDescription icount_vmstate_warp_timer = {
660 .name = "timer/icount/warp_timer",
661 .version_id = 1,
662 .minimum_version_id = 1,
663 .needed = warp_timer_state_needed,
664 .fields = (VMStateField[]) {
665 VMSTATE_INT64(vm_clock_warp_start, TimersState),
666 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
667 VMSTATE_END_OF_LIST()
668 }
669};
670
671static const VMStateDescription icount_vmstate_adjust_timers = {
672 .name = "timer/icount/timers",
673 .version_id = 1,
674 .minimum_version_id = 1,
675 .needed = adjust_timers_state_needed,
676 .fields = (VMStateField[]) {
677 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
678 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
679 VMSTATE_END_OF_LIST()
680 }
681};
682
b8164e68
PD
683static const VMStateDescription icount_vmstate_shift = {
684 .name = "timer/icount/shift",
685 .version_id = 1,
686 .minimum_version_id = 1,
687 .needed = shift_state_needed,
688 .fields = (VMStateField[]) {
689 VMSTATE_INT16(icount_time_shift, TimersState),
690 VMSTATE_END_OF_LIST()
691 }
692};
693
d09eae37
FK
694/*
695 * This is a subsection for icount migration.
696 */
697static const VMStateDescription icount_vmstate_timers = {
698 .name = "timer/icount",
699 .version_id = 1,
700 .minimum_version_id = 1,
5cd8cada 701 .needed = icount_state_needed,
d09eae37
FK
702 .fields = (VMStateField[]) {
703 VMSTATE_INT64(qemu_icount_bias, TimersState),
704 VMSTATE_INT64(qemu_icount, TimersState),
705 VMSTATE_END_OF_LIST()
b39e3f34
PD
706 },
707 .subsections = (const VMStateDescription*[]) {
708 &icount_vmstate_warp_timer,
709 &icount_vmstate_adjust_timers,
b8164e68 710 &icount_vmstate_shift,
b39e3f34 711 NULL
d09eae37
FK
712 }
713};
714
946fb27c
PB
715static const VMStateDescription vmstate_timers = {
716 .name = "timer",
717 .version_id = 2,
718 .minimum_version_id = 1,
35d08458 719 .fields = (VMStateField[]) {
946fb27c 720 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 721 VMSTATE_UNUSED(8),
946fb27c
PB
722 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
723 VMSTATE_END_OF_LIST()
d09eae37 724 },
5cd8cada
JQ
725 .subsections = (const VMStateDescription*[]) {
726 &icount_vmstate_timers,
727 NULL
946fb27c
PB
728 }
729};
730
14e6fe12 731static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 732{
2adcc85d
JH
733 double pct;
734 double throttle_ratio;
bd1f7ff4 735 int64_t sleeptime_ns, endtime_ns;
2adcc85d
JH
736
737 if (!cpu_throttle_get_percentage()) {
738 return;
739 }
740
741 pct = (double)cpu_throttle_get_percentage()/100;
742 throttle_ratio = pct / (1 - pct);
bd1f7ff4
YK
743 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
744 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
745 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
746 while (sleeptime_ns > 0 && !cpu->stop) {
747 if (sleeptime_ns > SCALE_MS) {
748 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
749 sleeptime_ns / SCALE_MS);
750 } else {
751 qemu_mutex_unlock_iothread();
752 g_usleep(sleeptime_ns / SCALE_US);
753 qemu_mutex_lock_iothread();
754 }
755 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
756 }
90bb0c04 757 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
758}
759
760static void cpu_throttle_timer_tick(void *opaque)
761{
762 CPUState *cpu;
763 double pct;
764
765 /* Stop the timer if needed */
766 if (!cpu_throttle_get_percentage()) {
767 return;
768 }
769 CPU_FOREACH(cpu) {
770 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
771 async_run_on_cpu(cpu, cpu_throttle_thread,
772 RUN_ON_CPU_NULL);
2adcc85d
JH
773 }
774 }
775
776 pct = (double)cpu_throttle_get_percentage()/100;
777 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
778 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
779}
780
781void cpu_throttle_set(int new_throttle_pct)
782{
783 /* Ensure throttle percentage is within valid range */
784 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
785 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
786
787 atomic_set(&throttle_percentage, new_throttle_pct);
788
789 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
790 CPU_THROTTLE_TIMESLICE_NS);
791}
792
793void cpu_throttle_stop(void)
794{
795 atomic_set(&throttle_percentage, 0);
796}
797
798bool cpu_throttle_active(void)
799{
800 return (cpu_throttle_get_percentage() != 0);
801}
802
803int cpu_throttle_get_percentage(void)
804{
805 return atomic_read(&throttle_percentage);
806}
807
4603ea01
PD
808void cpu_ticks_init(void)
809{
ccdb3c1f 810 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 811 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 812 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
813 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
814 cpu_throttle_timer_tick, NULL);
4603ea01
PD
815}
816
1ad9580b 817void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 818{
abc9bf69
MA
819 const char *option = qemu_opt_get(opts, "shift");
820 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
821 bool align = qemu_opt_get_bool(opts, "align", false);
822 long time_shift = -1;
1ad9580b 823
6c1ddc36
MA
824 if (!option) {
825 if (qemu_opt_get(opts, "align") != NULL) {
826 error_setg(errp, "Please specify shift option when using align");
827 }
946fb27c
PB
828 return;
829 }
f1f4b57e 830
abc9bf69 831 if (align && !sleep) {
778d9f9b 832 error_setg(errp, "align=on and sleep=off are incompatible");
abc9bf69 833 return;
f1f4b57e 834 }
abc9bf69 835
946fb27c 836 if (strcmp(option, "auto") != 0) {
9ec374a7
MA
837 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
838 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
a8bfac37 839 error_setg(errp, "icount: Invalid shift value");
abc9bf69 840 return;
a8bfac37 841 }
a8bfac37
ST
842 } else if (icount_align_option) {
843 error_setg(errp, "shift=auto and align=on are incompatible");
abc9bf69 844 return;
f1f4b57e 845 } else if (!icount_sleep) {
778d9f9b 846 error_setg(errp, "shift=auto and sleep=off are incompatible");
abc9bf69
MA
847 return;
848 }
849
850 icount_sleep = sleep;
851 if (icount_sleep) {
852 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
853 icount_timer_cb, NULL);
854 }
855
856 icount_align_option = align;
857
858 if (time_shift >= 0) {
859 timers_state.icount_time_shift = time_shift;
860 use_icount = 1;
861 return;
946fb27c
PB
862 }
863
864 use_icount = 2;
865
866 /* 125MIPS seems a reasonable initial guess at the guest speed.
867 It will be corrected fairly quickly anyway. */
c1ff073c 868 timers_state.icount_time_shift = 3;
946fb27c
PB
869
870 /* Have both realtime and virtual time triggers for speed adjustment.
871 The realtime trigger catches emulated time passing too slowly,
872 the virtual time trigger catches emulated time passing too fast.
873 Realtime triggers occur even when idle, so use them less frequently
874 than VM triggers. */
b39e3f34
PD
875 timers_state.vm_clock_warp_start = -1;
876 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 877 icount_adjust_rt, NULL);
b39e3f34 878 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 879 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 880 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 881 icount_adjust_vm, NULL);
b39e3f34 882 timer_mod(timers_state.icount_vm_timer,
40daca54 883 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 884 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
885}
886
6546706d
AB
887/***********************************************************/
888/* TCG vCPU kick timer
889 *
890 * The kick timer is responsible for moving single threaded vCPU
891 * emulation on to the next vCPU. If more than one vCPU is running a
892 * timer event with force a cpu->exit so the next vCPU can get
893 * scheduled.
894 *
895 * The timer is removed if all vCPUs are idle and restarted again once
896 * idleness is complete.
897 */
898
899static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 900static CPUState *tcg_current_rr_cpu;
6546706d
AB
901
902#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
903
904static inline int64_t qemu_tcg_next_kick(void)
905{
906 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
907}
908
e8f22f76
AB
909/* Kick the currently round-robin scheduled vCPU to next */
910static void qemu_cpu_kick_rr_next_cpu(void)
791158d9
AB
911{
912 CPUState *cpu;
791158d9
AB
913 do {
914 cpu = atomic_mb_read(&tcg_current_rr_cpu);
915 if (cpu) {
916 cpu_exit(cpu);
917 }
918 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
919}
920
e8f22f76
AB
921/* Kick all RR vCPUs */
922static void qemu_cpu_kick_rr_cpus(void)
923{
924 CPUState *cpu;
925
926 CPU_FOREACH(cpu) {
927 cpu_exit(cpu);
928 };
929}
930
6b8f0187
PB
931static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
932{
933}
934
3f53bc61
PB
935void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
936{
6b8f0187
PB
937 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
938 qemu_notify_event();
939 return;
940 }
941
c52e7132
PM
942 if (qemu_in_vcpu_thread()) {
943 /* A CPU is currently running; kick it back out to the
944 * tcg_cpu_exec() loop so it will recalculate its
945 * icount deadline immediately.
946 */
947 qemu_cpu_kick(current_cpu);
948 } else if (first_cpu) {
6b8f0187
PB
949 /* qemu_cpu_kick is not enough to kick a halted CPU out of
950 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
951 * causes cpu_thread_is_idle to return false. This way,
952 * handle_icount_deadline can run.
c52e7132
PM
953 * If we have no CPUs at all for some reason, we don't
954 * need to do anything.
6b8f0187
PB
955 */
956 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
957 }
3f53bc61
PB
958}
959
6546706d
AB
960static void kick_tcg_thread(void *opaque)
961{
962 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
e8f22f76 963 qemu_cpu_kick_rr_next_cpu();
6546706d
AB
964}
965
966static void start_tcg_kick_timer(void)
967{
db08b687
PB
968 assert(!mttcg_enabled);
969 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
970 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
971 kick_tcg_thread, NULL);
1926ab27
AB
972 }
973 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
974 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
975 }
976}
977
978static void stop_tcg_kick_timer(void)
979{
db08b687 980 assert(!mttcg_enabled);
1926ab27 981 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 982 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
983 }
984}
985
296af7c9
BS
986/***********************************************************/
987void hw_error(const char *fmt, ...)
988{
989 va_list ap;
55e5c285 990 CPUState *cpu;
296af7c9
BS
991
992 va_start(ap, fmt);
993 fprintf(stderr, "qemu: hardware error: ");
994 vfprintf(stderr, fmt, ap);
995 fprintf(stderr, "\n");
bdc44640 996 CPU_FOREACH(cpu) {
55e5c285 997 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 998 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
999 }
1000 va_end(ap);
1001 abort();
1002}
1003
1004void cpu_synchronize_all_states(void)
1005{
182735ef 1006 CPUState *cpu;
296af7c9 1007
bdc44640 1008 CPU_FOREACH(cpu) {
182735ef 1009 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
1010 /* TODO: move to cpu_synchronize_state() */
1011 if (hvf_enabled()) {
1012 hvf_cpu_synchronize_state(cpu);
1013 }
296af7c9
BS
1014 }
1015}
1016
1017void cpu_synchronize_all_post_reset(void)
1018{
182735ef 1019 CPUState *cpu;
296af7c9 1020
bdc44640 1021 CPU_FOREACH(cpu) {
182735ef 1022 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1023 /* TODO: move to cpu_synchronize_post_reset() */
1024 if (hvf_enabled()) {
1025 hvf_cpu_synchronize_post_reset(cpu);
1026 }
296af7c9
BS
1027 }
1028}
1029
1030void cpu_synchronize_all_post_init(void)
1031{
182735ef 1032 CPUState *cpu;
296af7c9 1033
bdc44640 1034 CPU_FOREACH(cpu) {
182735ef 1035 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1036 /* TODO: move to cpu_synchronize_post_init() */
1037 if (hvf_enabled()) {
1038 hvf_cpu_synchronize_post_init(cpu);
1039 }
296af7c9
BS
1040 }
1041}
1042
75e972da
DG
1043void cpu_synchronize_all_pre_loadvm(void)
1044{
1045 CPUState *cpu;
1046
1047 CPU_FOREACH(cpu) {
1048 cpu_synchronize_pre_loadvm(cpu);
1049 }
1050}
1051
4486e89c 1052static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1053{
56983463
KW
1054 int ret = 0;
1055
1354869c 1056 if (runstate_is_running()) {
f962cac4 1057 runstate_set(state);
296af7c9 1058 cpu_disable_ticks();
296af7c9 1059 pause_all_vcpus();
1dfb4dd9 1060 vm_state_notify(0, state);
4486e89c 1061 if (send_stop) {
3ab72385 1062 qapi_event_send_stop();
4486e89c 1063 }
296af7c9 1064 }
56983463 1065
594a45ce 1066 bdrv_drain_all();
22af08ea 1067 ret = bdrv_flush_all();
594a45ce 1068
56983463 1069 return ret;
296af7c9
BS
1070}
1071
4486e89c
SH
1072/* Special vm_stop() variant for terminating the process. Historically clients
1073 * did not expect a QMP STOP event and so we need to retain compatibility.
1074 */
1075int vm_shutdown(void)
1076{
1077 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1078}
1079
a1fcaa73 1080static bool cpu_can_run(CPUState *cpu)
296af7c9 1081{
4fdeee7c 1082 if (cpu->stop) {
a1fcaa73 1083 return false;
0ab07c62 1084 }
321bc0b2 1085 if (cpu_is_stopped(cpu)) {
a1fcaa73 1086 return false;
0ab07c62 1087 }
a1fcaa73 1088 return true;
296af7c9
BS
1089}
1090
91325046 1091static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1092{
64f6b346 1093 gdb_set_stop_cpu(cpu);
8cf71710 1094 qemu_system_debug_request();
f324e766 1095 cpu->stopped = true;
3c638d06
JK
1096}
1097
6d9cb73c
JK
1098#ifdef CONFIG_LINUX
1099static void sigbus_reraise(void)
1100{
1101 sigset_t set;
1102 struct sigaction action;
1103
1104 memset(&action, 0, sizeof(action));
1105 action.sa_handler = SIG_DFL;
1106 if (!sigaction(SIGBUS, &action, NULL)) {
1107 raise(SIGBUS);
1108 sigemptyset(&set);
1109 sigaddset(&set, SIGBUS);
a2d1761d 1110 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1111 }
1112 perror("Failed to re-raise SIGBUS!\n");
1113 abort();
1114}
1115
d98d4072 1116static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1117{
a16fc07e
PB
1118 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1119 sigbus_reraise();
1120 }
1121
2ae41db2
PB
1122 if (current_cpu) {
1123 /* Called asynchronously in VCPU thread. */
1124 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1125 sigbus_reraise();
1126 }
1127 } else {
1128 /* Called synchronously (via signalfd) in main thread. */
1129 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1130 sigbus_reraise();
1131 }
6d9cb73c
JK
1132 }
1133}
1134
1135static void qemu_init_sigbus(void)
1136{
1137 struct sigaction action;
1138
1139 memset(&action, 0, sizeof(action));
1140 action.sa_flags = SA_SIGINFO;
d98d4072 1141 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1142 sigaction(SIGBUS, &action, NULL);
1143
1144 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1145}
6d9cb73c 1146#else /* !CONFIG_LINUX */
6d9cb73c
JK
1147static void qemu_init_sigbus(void)
1148{
1149}
a16fc07e 1150#endif /* !CONFIG_LINUX */
ff48eb5f 1151
296af7c9
BS
1152static QemuThread io_thread;
1153
296af7c9
BS
1154/* cpu creation */
1155static QemuCond qemu_cpu_cond;
1156/* system init */
296af7c9
BS
1157static QemuCond qemu_pause_cond;
1158
d3b12f5d 1159void qemu_init_cpu_loop(void)
296af7c9 1160{
6d9cb73c 1161 qemu_init_sigbus();
ed94592b 1162 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1163 qemu_cond_init(&qemu_pause_cond);
296af7c9 1164 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1165
b7680cb6 1166 qemu_thread_get_self(&io_thread);
296af7c9
BS
1167}
1168
14e6fe12 1169void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1170{
d148d90e 1171 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1172}
1173
4c055ab5
GZ
1174static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1175{
1176 if (kvm_destroy_vcpu(cpu) < 0) {
1177 error_report("kvm_destroy_vcpu failed");
1178 exit(EXIT_FAILURE);
1179 }
1180}
1181
1182static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1183{
1184}
1185
ebd05fea
DH
1186static void qemu_cpu_stop(CPUState *cpu, bool exit)
1187{
1188 g_assert(qemu_cpu_is_self(cpu));
1189 cpu->stop = false;
1190 cpu->stopped = true;
1191 if (exit) {
1192 cpu_exit(cpu);
1193 }
1194 qemu_cond_broadcast(&qemu_pause_cond);
1195}
1196
509a0d78 1197static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1198{
37257942 1199 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1200 if (cpu->stop) {
ebd05fea 1201 qemu_cpu_stop(cpu, false);
296af7c9 1202 }
a5403c69 1203 process_queued_cpu_work(cpu);
37257942
AB
1204}
1205
a8efa606 1206static void qemu_tcg_rr_wait_io_event(void)
37257942 1207{
a8efa606
PB
1208 CPUState *cpu;
1209
db08b687 1210 while (all_cpu_threads_idle()) {
6546706d 1211 stop_tcg_kick_timer();
a8efa606 1212 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1213 }
296af7c9 1214
6546706d
AB
1215 start_tcg_kick_timer();
1216
a8efa606
PB
1217 CPU_FOREACH(cpu) {
1218 qemu_wait_io_event_common(cpu);
1219 }
296af7c9
BS
1220}
1221
db08b687 1222static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1223{
30865f31
EC
1224 bool slept = false;
1225
a98ae1d8 1226 while (cpu_thread_is_idle(cpu)) {
30865f31
EC
1227 if (!slept) {
1228 slept = true;
1229 qemu_plugin_vcpu_idle_cb(cpu);
1230 }
f5c121b8 1231 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1232 }
30865f31
EC
1233 if (slept) {
1234 qemu_plugin_vcpu_resume_cb(cpu);
1235 }
296af7c9 1236
db08b687
PB
1237#ifdef _WIN32
1238 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1239 if (!tcg_enabled()) {
1240 SleepEx(0, TRUE);
c97d6d2c 1241 }
db08b687 1242#endif
c97d6d2c
SAGDR
1243 qemu_wait_io_event_common(cpu);
1244}
1245
7e97cd88 1246static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1247{
48a106bd 1248 CPUState *cpu = arg;
84b4915d 1249 int r;
296af7c9 1250
ab28bd23
PB
1251 rcu_register_thread();
1252
2e7f7a3c 1253 qemu_mutex_lock_iothread();
814e612e 1254 qemu_thread_get_self(cpu->thread);
9f09e18a 1255 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1256 cpu->can_do_io = 1;
4917cf44 1257 current_cpu = cpu;
296af7c9 1258
504134d2 1259 r = kvm_init_vcpu(cpu);
84b4915d 1260 if (r < 0) {
493d89bf 1261 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1262 exit(1);
1263 }
296af7c9 1264
18268b60 1265 kvm_init_cpu_signals(cpu);
296af7c9
BS
1266
1267 /* signal CPU creation */
61a46217 1268 cpu->created = true;
296af7c9 1269 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1270 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1271
4c055ab5 1272 do {
a1fcaa73 1273 if (cpu_can_run(cpu)) {
1458c363 1274 r = kvm_cpu_exec(cpu);
83f338f7 1275 if (r == EXCP_DEBUG) {
91325046 1276 cpu_handle_guest_debug(cpu);
83f338f7 1277 }
0ab07c62 1278 }
db08b687 1279 qemu_wait_io_event(cpu);
4c055ab5 1280 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1281
4c055ab5 1282 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1283 cpu->created = false;
1284 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1285 qemu_mutex_unlock_iothread();
57615ed5 1286 rcu_unregister_thread();
296af7c9
BS
1287 return NULL;
1288}
1289
c7f0f3b1
AL
1290static void *qemu_dummy_cpu_thread_fn(void *arg)
1291{
1292#ifdef _WIN32
493d89bf 1293 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1294 exit(1);
1295#else
10a9021d 1296 CPUState *cpu = arg;
c7f0f3b1
AL
1297 sigset_t waitset;
1298 int r;
1299
ab28bd23
PB
1300 rcu_register_thread();
1301
c7f0f3b1 1302 qemu_mutex_lock_iothread();
814e612e 1303 qemu_thread_get_self(cpu->thread);
9f09e18a 1304 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1305 cpu->can_do_io = 1;
37257942 1306 current_cpu = cpu;
c7f0f3b1
AL
1307
1308 sigemptyset(&waitset);
1309 sigaddset(&waitset, SIG_IPI);
1310
1311 /* signal CPU creation */
61a46217 1312 cpu->created = true;
c7f0f3b1 1313 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1314 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1315
d2831ab0 1316 do {
c7f0f3b1
AL
1317 qemu_mutex_unlock_iothread();
1318 do {
1319 int sig;
1320 r = sigwait(&waitset, &sig);
1321 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1322 if (r == -1) {
1323 perror("sigwait");
1324 exit(1);
1325 }
1326 qemu_mutex_lock_iothread();
db08b687 1327 qemu_wait_io_event(cpu);
d2831ab0 1328 } while (!cpu->unplug);
c7f0f3b1 1329
d40bfcbb 1330 qemu_mutex_unlock_iothread();
d2831ab0 1331 rcu_unregister_thread();
c7f0f3b1
AL
1332 return NULL;
1333#endif
1334}
1335
1be7fcb8
AB
1336static int64_t tcg_get_icount_limit(void)
1337{
1338 int64_t deadline;
1339
1340 if (replay_mode != REPLAY_MODE_PLAY) {
dcb15780
PD
1341 /*
1342 * Include all the timers, because they may need an attention.
1343 * Too long CPU execution may create unnecessary delay in UI.
1344 */
1345 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1346 QEMU_TIMER_ATTR_ALL);
fc6b2dba
PD
1347 /* Check realtime timers, because they help with input processing */
1348 deadline = qemu_soonest_timeout(deadline,
1349 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1350 QEMU_TIMER_ATTR_ALL));
1be7fcb8
AB
1351
1352 /* Maintain prior (possibly buggy) behaviour where if no deadline
1353 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1354 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1355 * nanoseconds.
1356 */
1357 if ((deadline < 0) || (deadline > INT32_MAX)) {
1358 deadline = INT32_MAX;
1359 }
1360
1361 return qemu_icount_round(deadline);
1362 } else {
1363 return replay_get_instructions();
1364 }
1365}
1366
12e9700d
AB
1367static void handle_icount_deadline(void)
1368{
6b8f0187 1369 assert(qemu_in_vcpu_thread());
12e9700d 1370 if (use_icount) {
dcb15780
PD
1371 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1372 QEMU_TIMER_ATTR_ALL);
12e9700d
AB
1373
1374 if (deadline == 0) {
6b8f0187 1375 /* Wake up other AioContexts. */
12e9700d 1376 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1378 }
1379 }
1380}
1381
05248382 1382static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1383{
1be7fcb8 1384 if (use_icount) {
eda5f7c6 1385 int insns_left;
05248382
AB
1386
1387 /* These should always be cleared by process_icount_data after
1388 * each vCPU execution. However u16.high can be raised
1389 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1390 */
5e140196 1391 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1392 g_assert(cpu->icount_extra == 0);
1393
eda5f7c6
AB
1394 cpu->icount_budget = tcg_get_icount_limit();
1395 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1396 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1397 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1398
1399 replay_mutex_lock();
1be7fcb8 1400 }
05248382
AB
1401}
1402
1403static void process_icount_data(CPUState *cpu)
1404{
1be7fcb8 1405 if (use_icount) {
e4cd9657 1406 /* Account for executed instructions */
512d3c80 1407 cpu_update_icount(cpu);
05248382
AB
1408
1409 /* Reset the counters */
5e140196 1410 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1411 cpu->icount_extra = 0;
e4cd9657
AB
1412 cpu->icount_budget = 0;
1413
1be7fcb8 1414 replay_account_executed_instructions();
d759c951
AB
1415
1416 replay_mutex_unlock();
1be7fcb8 1417 }
05248382
AB
1418}
1419
1420
1421static int tcg_cpu_exec(CPUState *cpu)
1422{
1423 int ret;
1424#ifdef CONFIG_PROFILER
1425 int64_t ti;
1426#endif
1427
f28d0dfd 1428 assert(tcg_enabled());
05248382
AB
1429#ifdef CONFIG_PROFILER
1430 ti = profile_getclock();
1431#endif
05248382
AB
1432 cpu_exec_start(cpu);
1433 ret = cpu_exec(cpu);
1434 cpu_exec_end(cpu);
05248382 1435#ifdef CONFIG_PROFILER
72fd2efb
EC
1436 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1437 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1438#endif
1be7fcb8
AB
1439 return ret;
1440}
1441
c93bbbef
AB
1442/* Destroy any remaining vCPUs which have been unplugged and have
1443 * finished running
1444 */
1445static void deal_with_unplugged_cpus(void)
1be7fcb8 1446{
c93bbbef 1447 CPUState *cpu;
1be7fcb8 1448
c93bbbef
AB
1449 CPU_FOREACH(cpu) {
1450 if (cpu->unplug && !cpu_can_run(cpu)) {
1451 qemu_tcg_destroy_vcpu(cpu);
1452 cpu->created = false;
1453 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1454 break;
1455 }
1456 }
1be7fcb8 1457}
bdb7ca67 1458
6546706d
AB
1459/* Single-threaded TCG
1460 *
1461 * In the single-threaded case each vCPU is simulated in turn. If
1462 * there is more than a single vCPU we create a simple timer to kick
1463 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1464 * This is done explicitly rather than relying on side-effects
1465 * elsewhere.
1466 */
1467
37257942 1468static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1469{
c3586ba7 1470 CPUState *cpu = arg;
296af7c9 1471
f28d0dfd 1472 assert(tcg_enabled());
ab28bd23 1473 rcu_register_thread();
3468b59e 1474 tcg_register_thread();
ab28bd23 1475
2e7f7a3c 1476 qemu_mutex_lock_iothread();
814e612e 1477 qemu_thread_get_self(cpu->thread);
296af7c9 1478
5a9c973b
DH
1479 cpu->thread_id = qemu_get_thread_id();
1480 cpu->created = true;
1481 cpu->can_do_io = 1;
296af7c9 1482 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1483 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1484
fa7d1867 1485 /* wait for initial kick-off after machine start */
c28e399c 1486 while (first_cpu->stopped) {
d5f8d613 1487 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1488
1489 /* process any pending work */
bdc44640 1490 CPU_FOREACH(cpu) {
37257942 1491 current_cpu = cpu;
182735ef 1492 qemu_wait_io_event_common(cpu);
8e564b4e 1493 }
0ab07c62 1494 }
296af7c9 1495
6546706d
AB
1496 start_tcg_kick_timer();
1497
c93bbbef
AB
1498 cpu = first_cpu;
1499
e5143e30
AB
1500 /* process any pending work */
1501 cpu->exit_request = 1;
1502
296af7c9 1503 while (1) {
d759c951
AB
1504 qemu_mutex_unlock_iothread();
1505 replay_mutex_lock();
1506 qemu_mutex_lock_iothread();
c93bbbef
AB
1507 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1508 qemu_account_warp_timer();
1509
6b8f0187
PB
1510 /* Run the timers here. This is much more efficient than
1511 * waking up the I/O thread and waiting for completion.
1512 */
1513 handle_icount_deadline();
1514
d759c951
AB
1515 replay_mutex_unlock();
1516
c93bbbef
AB
1517 if (!cpu) {
1518 cpu = first_cpu;
1519 }
1520
e5143e30
AB
1521 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1522
791158d9 1523 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1524 current_cpu = cpu;
c93bbbef
AB
1525
1526 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1527 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1528
1529 if (cpu_can_run(cpu)) {
1530 int r;
05248382 1531
d759c951 1532 qemu_mutex_unlock_iothread();
05248382
AB
1533 prepare_icount_for_run(cpu);
1534
c93bbbef 1535 r = tcg_cpu_exec(cpu);
05248382
AB
1536
1537 process_icount_data(cpu);
d759c951 1538 qemu_mutex_lock_iothread();
05248382 1539
c93bbbef
AB
1540 if (r == EXCP_DEBUG) {
1541 cpu_handle_guest_debug(cpu);
1542 break;
08e73c48
PK
1543 } else if (r == EXCP_ATOMIC) {
1544 qemu_mutex_unlock_iothread();
1545 cpu_exec_step_atomic(cpu);
1546 qemu_mutex_lock_iothread();
1547 break;
c93bbbef 1548 }
37257942 1549 } else if (cpu->stop) {
c93bbbef
AB
1550 if (cpu->unplug) {
1551 cpu = CPU_NEXT(cpu);
1552 }
1553 break;
1554 }
1555
e5143e30
AB
1556 cpu = CPU_NEXT(cpu);
1557 } /* while (cpu && !cpu->exit_request).. */
1558
791158d9
AB
1559 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1560 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1561
e5143e30
AB
1562 if (cpu && cpu->exit_request) {
1563 atomic_mb_set(&cpu->exit_request, 0);
1564 }
ac70aafc 1565
013aabdc
CD
1566 if (use_icount && all_cpu_threads_idle()) {
1567 /*
1568 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1569 * in the main_loop, wake it up in order to start the warp timer.
1570 */
1571 qemu_notify_event();
1572 }
1573
a8efa606 1574 qemu_tcg_rr_wait_io_event();
c93bbbef 1575 deal_with_unplugged_cpus();
296af7c9
BS
1576 }
1577
9b0605f9 1578 rcu_unregister_thread();
296af7c9
BS
1579 return NULL;
1580}
1581
b0cb0a66
VP
1582static void *qemu_hax_cpu_thread_fn(void *arg)
1583{
1584 CPUState *cpu = arg;
1585 int r;
b3d3a426 1586
9857c2d2 1587 rcu_register_thread();
b3d3a426 1588 qemu_mutex_lock_iothread();
b0cb0a66 1589 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1590
1591 cpu->thread_id = qemu_get_thread_id();
1592 cpu->created = true;
b0cb0a66
VP
1593 current_cpu = cpu;
1594
1595 hax_init_vcpu(cpu);
1596 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1597 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1598
9857c2d2 1599 do {
b0cb0a66
VP
1600 if (cpu_can_run(cpu)) {
1601 r = hax_smp_cpu_exec(cpu);
1602 if (r == EXCP_DEBUG) {
1603 cpu_handle_guest_debug(cpu);
1604 }
1605 }
1606
db08b687 1607 qemu_wait_io_event(cpu);
9857c2d2
PB
1608 } while (!cpu->unplug || cpu_can_run(cpu));
1609 rcu_unregister_thread();
b0cb0a66
VP
1610 return NULL;
1611}
1612
c97d6d2c
SAGDR
1613/* The HVF-specific vCPU thread function. This one should only run when the host
1614 * CPU supports the VMX "unrestricted guest" feature. */
1615static void *qemu_hvf_cpu_thread_fn(void *arg)
1616{
1617 CPUState *cpu = arg;
1618
1619 int r;
1620
1621 assert(hvf_enabled());
1622
1623 rcu_register_thread();
1624
1625 qemu_mutex_lock_iothread();
1626 qemu_thread_get_self(cpu->thread);
1627
1628 cpu->thread_id = qemu_get_thread_id();
1629 cpu->can_do_io = 1;
1630 current_cpu = cpu;
1631
1632 hvf_init_vcpu(cpu);
1633
1634 /* signal CPU creation */
1635 cpu->created = true;
1636 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1637 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1638
1639 do {
1640 if (cpu_can_run(cpu)) {
1641 r = hvf_vcpu_exec(cpu);
1642 if (r == EXCP_DEBUG) {
1643 cpu_handle_guest_debug(cpu);
1644 }
1645 }
db08b687 1646 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1647 } while (!cpu->unplug || cpu_can_run(cpu));
1648
1649 hvf_vcpu_destroy(cpu);
1650 cpu->created = false;
1651 qemu_cond_signal(&qemu_cpu_cond);
1652 qemu_mutex_unlock_iothread();
8178e637 1653 rcu_unregister_thread();
c97d6d2c
SAGDR
1654 return NULL;
1655}
1656
19306806
JTV
1657static void *qemu_whpx_cpu_thread_fn(void *arg)
1658{
1659 CPUState *cpu = arg;
1660 int r;
1661
1662 rcu_register_thread();
1663
1664 qemu_mutex_lock_iothread();
1665 qemu_thread_get_self(cpu->thread);
1666 cpu->thread_id = qemu_get_thread_id();
1667 current_cpu = cpu;
1668
1669 r = whpx_init_vcpu(cpu);
1670 if (r < 0) {
1671 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1672 exit(1);
1673 }
1674
1675 /* signal CPU creation */
1676 cpu->created = true;
1677 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1678 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1679
1680 do {
1681 if (cpu_can_run(cpu)) {
1682 r = whpx_vcpu_exec(cpu);
1683 if (r == EXCP_DEBUG) {
1684 cpu_handle_guest_debug(cpu);
1685 }
1686 }
1687 while (cpu_thread_is_idle(cpu)) {
1688 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1689 }
1690 qemu_wait_io_event_common(cpu);
1691 } while (!cpu->unplug || cpu_can_run(cpu));
1692
1693 whpx_destroy_vcpu(cpu);
1694 cpu->created = false;
1695 qemu_cond_signal(&qemu_cpu_cond);
1696 qemu_mutex_unlock_iothread();
1697 rcu_unregister_thread();
c97d6d2c
SAGDR
1698 return NULL;
1699}
1700
b0cb0a66
VP
1701#ifdef _WIN32
1702static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1703{
1704}
1705#endif
1706
37257942
AB
1707/* Multi-threaded TCG
1708 *
1709 * In the multi-threaded case each vCPU has its own thread. The TLS
1710 * variable current_cpu can be used deep in the code to find the
1711 * current CPUState for a given thread.
1712 */
1713
1714static void *qemu_tcg_cpu_thread_fn(void *arg)
1715{
1716 CPUState *cpu = arg;
1717
f28d0dfd 1718 assert(tcg_enabled());
bf51c720
AB
1719 g_assert(!use_icount);
1720
37257942 1721 rcu_register_thread();
3468b59e 1722 tcg_register_thread();
37257942
AB
1723
1724 qemu_mutex_lock_iothread();
1725 qemu_thread_get_self(cpu->thread);
1726
1727 cpu->thread_id = qemu_get_thread_id();
1728 cpu->created = true;
1729 cpu->can_do_io = 1;
1730 current_cpu = cpu;
1731 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1732 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1733
1734 /* process any pending work */
1735 cpu->exit_request = 1;
1736
54961aac 1737 do {
37257942
AB
1738 if (cpu_can_run(cpu)) {
1739 int r;
d759c951 1740 qemu_mutex_unlock_iothread();
37257942 1741 r = tcg_cpu_exec(cpu);
d759c951 1742 qemu_mutex_lock_iothread();
37257942
AB
1743 switch (r) {
1744 case EXCP_DEBUG:
1745 cpu_handle_guest_debug(cpu);
1746 break;
1747 case EXCP_HALTED:
1748 /* during start-up the vCPU is reset and the thread is
1749 * kicked several times. If we don't ensure we go back
1750 * to sleep in the halted state we won't cleanly
1751 * start-up when the vCPU is enabled.
1752 *
1753 * cpu->halted should ensure we sleep in wait_io_event
1754 */
1755 g_assert(cpu->halted);
1756 break;
08e73c48
PK
1757 case EXCP_ATOMIC:
1758 qemu_mutex_unlock_iothread();
1759 cpu_exec_step_atomic(cpu);
1760 qemu_mutex_lock_iothread();
37257942
AB
1761 default:
1762 /* Ignore everything else? */
1763 break;
1764 }
1765 }
1766
37257942 1767 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1768 qemu_wait_io_event(cpu);
9b0605f9 1769 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1770
9b0605f9
PB
1771 qemu_tcg_destroy_vcpu(cpu);
1772 cpu->created = false;
1773 qemu_cond_signal(&qemu_cpu_cond);
1774 qemu_mutex_unlock_iothread();
1775 rcu_unregister_thread();
37257942
AB
1776 return NULL;
1777}
1778
2ff09a40 1779static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1780{
1781#ifndef _WIN32
1782 int err;
1783
e0c38211
PB
1784 if (cpu->thread_kicked) {
1785 return;
9102deda 1786 }
e0c38211 1787 cpu->thread_kicked = true;
814e612e 1788 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1789 if (err && err != ESRCH) {
cc015e9a
PB
1790 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1791 exit(1);
1792 }
1793#else /* _WIN32 */
b0cb0a66 1794 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1795 if (whpx_enabled()) {
1796 whpx_vcpu_kick(cpu);
1797 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1798 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1799 __func__, GetLastError());
1800 exit(1);
1801 }
1802 }
e0c38211
PB
1803#endif
1804}
ed9164a3 1805
c08d7424 1806void qemu_cpu_kick(CPUState *cpu)
296af7c9 1807{
f5c121b8 1808 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1809 if (tcg_enabled()) {
e8f22f76
AB
1810 if (qemu_tcg_mttcg_enabled()) {
1811 cpu_exit(cpu);
1812 } else {
1813 qemu_cpu_kick_rr_cpus();
1814 }
e0c38211 1815 } else {
b0cb0a66
VP
1816 if (hax_enabled()) {
1817 /*
1818 * FIXME: race condition with the exit_request check in
1819 * hax_vcpu_hax_exec
1820 */
1821 cpu->exit_request = 1;
1822 }
e0c38211
PB
1823 qemu_cpu_kick_thread(cpu);
1824 }
296af7c9
BS
1825}
1826
46d62fac 1827void qemu_cpu_kick_self(void)
296af7c9 1828{
4917cf44 1829 assert(current_cpu);
9102deda 1830 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1831}
1832
60e82579 1833bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1834{
814e612e 1835 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1836}
1837
79e2b9ae 1838bool qemu_in_vcpu_thread(void)
aa723c23 1839{
4917cf44 1840 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1841}
1842
afbe7053
PB
1843static __thread bool iothread_locked = false;
1844
1845bool qemu_mutex_iothread_locked(void)
1846{
1847 return iothread_locked;
1848}
1849
cb764d06
EC
1850/*
1851 * The BQL is taken from so many places that it is worth profiling the
1852 * callers directly, instead of funneling them all through a single function.
1853 */
1854void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1855{
cb764d06
EC
1856 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1857
8d04fb55 1858 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1859 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1860 iothread_locked = true;
296af7c9
BS
1861}
1862
1863void qemu_mutex_unlock_iothread(void)
1864{
8d04fb55 1865 g_assert(qemu_mutex_iothread_locked());
afbe7053 1866 iothread_locked = false;
296af7c9
BS
1867 qemu_mutex_unlock(&qemu_global_mutex);
1868}
1869
19e067e0
AP
1870void qemu_cond_wait_iothread(QemuCond *cond)
1871{
1872 qemu_cond_wait(cond, &qemu_global_mutex);
1873}
1874
e8faee06 1875static bool all_vcpus_paused(void)
296af7c9 1876{
bdc44640 1877 CPUState *cpu;
296af7c9 1878
bdc44640 1879 CPU_FOREACH(cpu) {
182735ef 1880 if (!cpu->stopped) {
e8faee06 1881 return false;
0ab07c62 1882 }
296af7c9
BS
1883 }
1884
e8faee06 1885 return true;
296af7c9
BS
1886}
1887
1888void pause_all_vcpus(void)
1889{
bdc44640 1890 CPUState *cpu;
296af7c9 1891
40daca54 1892 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1893 CPU_FOREACH(cpu) {
ebd05fea
DH
1894 if (qemu_cpu_is_self(cpu)) {
1895 qemu_cpu_stop(cpu, true);
1896 } else {
1897 cpu->stop = true;
1898 qemu_cpu_kick(cpu);
1899 }
d798e974
JK
1900 }
1901
d759c951
AB
1902 /* We need to drop the replay_lock so any vCPU threads woken up
1903 * can finish their replay tasks
1904 */
1905 replay_mutex_unlock();
1906
296af7c9 1907 while (!all_vcpus_paused()) {
be7d6c57 1908 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1909 CPU_FOREACH(cpu) {
182735ef 1910 qemu_cpu_kick(cpu);
296af7c9
BS
1911 }
1912 }
d759c951
AB
1913
1914 qemu_mutex_unlock_iothread();
1915 replay_mutex_lock();
1916 qemu_mutex_lock_iothread();
296af7c9
BS
1917}
1918
2993683b
IM
1919void cpu_resume(CPUState *cpu)
1920{
1921 cpu->stop = false;
1922 cpu->stopped = false;
1923 qemu_cpu_kick(cpu);
1924}
1925
296af7c9
BS
1926void resume_all_vcpus(void)
1927{
bdc44640 1928 CPUState *cpu;
296af7c9 1929
f962cac4
LM
1930 if (!runstate_is_running()) {
1931 return;
1932 }
1933
40daca54 1934 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1935 CPU_FOREACH(cpu) {
182735ef 1936 cpu_resume(cpu);
296af7c9
BS
1937 }
1938}
1939
dbadee4f 1940void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1941{
1942 cpu->stop = true;
1943 cpu->unplug = true;
1944 qemu_cpu_kick(cpu);
dbadee4f
PB
1945 qemu_mutex_unlock_iothread();
1946 qemu_thread_join(cpu->thread);
1947 qemu_mutex_lock_iothread();
2c579042
BR
1948}
1949
4900116e
DDAG
1950/* For temporary buffers for forming a name */
1951#define VCPU_THREAD_NAME_SIZE 16
1952
e5ab30a2 1953static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1954{
4900116e 1955 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1956 static QemuCond *single_tcg_halt_cond;
1957 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1958 static int tcg_region_inited;
1959
f28d0dfd 1960 assert(tcg_enabled());
e8feb96f
EC
1961 /*
1962 * Initialize TCG regions--once. Now is a good time, because:
1963 * (1) TCG's init context, prologue and target globals have been set up.
1964 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1965 * -accel flag is processed, so the check doesn't work then).
1966 */
1967 if (!tcg_region_inited) {
1968 tcg_region_inited = 1;
1969 tcg_region_init();
1970 }
4900116e 1971
37257942 1972 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1973 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1974 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1975 qemu_cond_init(cpu->halt_cond);
37257942
AB
1976
1977 if (qemu_tcg_mttcg_enabled()) {
1978 /* create a thread per vCPU with TCG (MTTCG) */
1979 parallel_cpus = true;
1980 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1981 cpu->cpu_index);
37257942
AB
1982
1983 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1984 cpu, QEMU_THREAD_JOINABLE);
1985
1986 } else {
1987 /* share a single thread for all cpus with TCG */
1988 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1989 qemu_thread_create(cpu->thread, thread_name,
1990 qemu_tcg_rr_cpu_thread_fn,
1991 cpu, QEMU_THREAD_JOINABLE);
1992
1993 single_tcg_halt_cond = cpu->halt_cond;
1994 single_tcg_cpu_thread = cpu->thread;
1995 }
1ecf47bf 1996#ifdef _WIN32
814e612e 1997 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1998#endif
296af7c9 1999 } else {
37257942
AB
2000 /* For non-MTTCG cases we share the thread */
2001 cpu->thread = single_tcg_cpu_thread;
2002 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
2003 cpu->thread_id = first_cpu->thread_id;
2004 cpu->can_do_io = 1;
2005 cpu->created = true;
296af7c9
BS
2006 }
2007}
2008
b0cb0a66
VP
2009static void qemu_hax_start_vcpu(CPUState *cpu)
2010{
2011 char thread_name[VCPU_THREAD_NAME_SIZE];
2012
2013 cpu->thread = g_malloc0(sizeof(QemuThread));
2014 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2015 qemu_cond_init(cpu->halt_cond);
2016
2017 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2018 cpu->cpu_index);
2019 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2020 cpu, QEMU_THREAD_JOINABLE);
2021#ifdef _WIN32
2022 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2023#endif
b0cb0a66
VP
2024}
2025
48a106bd 2026static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2027{
4900116e
DDAG
2028 char thread_name[VCPU_THREAD_NAME_SIZE];
2029
814e612e 2030 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2031 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2032 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2033 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2034 cpu->cpu_index);
2035 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2036 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2037}
2038
c97d6d2c
SAGDR
2039static void qemu_hvf_start_vcpu(CPUState *cpu)
2040{
2041 char thread_name[VCPU_THREAD_NAME_SIZE];
2042
2043 /* HVF currently does not support TCG, and only runs in
2044 * unrestricted-guest mode. */
2045 assert(hvf_enabled());
2046
2047 cpu->thread = g_malloc0(sizeof(QemuThread));
2048 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2049 qemu_cond_init(cpu->halt_cond);
2050
2051 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2052 cpu->cpu_index);
2053 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2054 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2055}
2056
19306806
JTV
2057static void qemu_whpx_start_vcpu(CPUState *cpu)
2058{
2059 char thread_name[VCPU_THREAD_NAME_SIZE];
2060
2061 cpu->thread = g_malloc0(sizeof(QemuThread));
2062 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2063 qemu_cond_init(cpu->halt_cond);
2064 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2065 cpu->cpu_index);
2066 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2067 cpu, QEMU_THREAD_JOINABLE);
2068#ifdef _WIN32
2069 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2070#endif
19306806
JTV
2071}
2072
10a9021d 2073static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2074{
4900116e
DDAG
2075 char thread_name[VCPU_THREAD_NAME_SIZE];
2076
814e612e 2077 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2078 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2079 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2080 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2081 cpu->cpu_index);
2082 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2083 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2084}
2085
c643bed9 2086void qemu_init_vcpu(CPUState *cpu)
296af7c9 2087{
5cc8767d
LX
2088 MachineState *ms = MACHINE(qdev_get_machine());
2089
2090 cpu->nr_cores = ms->smp.cores;
2091 cpu->nr_threads = ms->smp.threads;
f324e766 2092 cpu->stopped = true;
9c09a251 2093 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2094
2095 if (!cpu->as) {
2096 /* If the target cpu hasn't set up any address spaces itself,
2097 * give it the default one.
2098 */
12ebc9a7 2099 cpu->num_ases = 1;
80ceb07a 2100 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2101 }
2102
0ab07c62 2103 if (kvm_enabled()) {
48a106bd 2104 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2105 } else if (hax_enabled()) {
2106 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2107 } else if (hvf_enabled()) {
2108 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2109 } else if (tcg_enabled()) {
e5ab30a2 2110 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2111 } else if (whpx_enabled()) {
2112 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2113 } else {
10a9021d 2114 qemu_dummy_start_vcpu(cpu);
0ab07c62 2115 }
81e96311
DH
2116
2117 while (!cpu->created) {
2118 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2119 }
296af7c9
BS
2120}
2121
b4a3d965 2122void cpu_stop_current(void)
296af7c9 2123{
4917cf44 2124 if (current_cpu) {
0ec7e677
PM
2125 current_cpu->stop = true;
2126 cpu_exit(current_cpu);
b4a3d965 2127 }
296af7c9
BS
2128}
2129
56983463 2130int vm_stop(RunState state)
296af7c9 2131{
aa723c23 2132 if (qemu_in_vcpu_thread()) {
74892d24 2133 qemu_system_vmstop_request_prepare();
1dfb4dd9 2134 qemu_system_vmstop_request(state);
296af7c9
BS
2135 /*
2136 * FIXME: should not return to device code in case
2137 * vm_stop() has been requested.
2138 */
b4a3d965 2139 cpu_stop_current();
56983463 2140 return 0;
296af7c9 2141 }
56983463 2142
4486e89c 2143 return do_vm_stop(state, true);
296af7c9
BS
2144}
2145
2d76e823
CI
2146/**
2147 * Prepare for (re)starting the VM.
2148 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2149 * running or in case of an error condition), 0 otherwise.
2150 */
2151int vm_prepare_start(void)
2152{
2153 RunState requested;
2d76e823
CI
2154
2155 qemu_vmstop_requested(&requested);
2156 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2157 return -1;
2158 }
2159
2160 /* Ensure that a STOP/RESUME pair of events is emitted if a
2161 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2162 * example, according to documentation is always followed by
2163 * the STOP event.
2164 */
2165 if (runstate_is_running()) {
3ab72385
PX
2166 qapi_event_send_stop();
2167 qapi_event_send_resume();
f056158d 2168 return -1;
2d76e823
CI
2169 }
2170
2171 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2172 qapi_event_send_resume();
f056158d 2173
f056158d
MA
2174 cpu_enable_ticks();
2175 runstate_set(RUN_STATE_RUNNING);
2176 vm_state_notify(1, RUN_STATE_RUNNING);
2177 return 0;
2d76e823
CI
2178}
2179
2180void vm_start(void)
2181{
2182 if (!vm_prepare_start()) {
2183 resume_all_vcpus();
2184 }
2185}
2186
8a9236f1
LC
2187/* does a state transition even if the VM is already stopped,
2188 current state is forgotten forever */
56983463 2189int vm_stop_force_state(RunState state)
8a9236f1
LC
2190{
2191 if (runstate_is_running()) {
56983463 2192 return vm_stop(state);
8a9236f1
LC
2193 } else {
2194 runstate_set(state);
b2780d32
WC
2195
2196 bdrv_drain_all();
594a45ce
KW
2197 /* Make sure to return an error if the flush in a previous vm_stop()
2198 * failed. */
22af08ea 2199 return bdrv_flush_all();
8a9236f1
LC
2200 }
2201}
2202
0442428a 2203void list_cpus(const char *optarg)
262353cb
BS
2204{
2205 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2206#if defined(cpu_list)
0442428a 2207 cpu_list();
262353cb
BS
2208#endif
2209}
de0b36b6 2210
0cfd6a9a
LC
2211void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2212 bool has_cpu, int64_t cpu_index, Error **errp)
2213{
2214 FILE *f;
2215 uint32_t l;
55e5c285 2216 CPUState *cpu;
0cfd6a9a 2217 uint8_t buf[1024];
0dc9daf0 2218 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2219
2220 if (!has_cpu) {
2221 cpu_index = 0;
2222 }
2223
151d1322
AF
2224 cpu = qemu_get_cpu(cpu_index);
2225 if (cpu == NULL) {
c6bd8c70
MA
2226 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2227 "a CPU number");
0cfd6a9a
LC
2228 return;
2229 }
2230
2231 f = fopen(filename, "wb");
2232 if (!f) {
618da851 2233 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2234 return;
2235 }
2236
2237 while (size != 0) {
2238 l = sizeof(buf);
2239 if (l > size)
2240 l = size;
2f4d0f59 2241 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2242 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2243 " specified", orig_addr, orig_size);
2f4d0f59
AK
2244 goto exit;
2245 }
0cfd6a9a 2246 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2247 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2248 goto exit;
2249 }
2250 addr += l;
2251 size -= l;
2252 }
2253
2254exit:
2255 fclose(f);
2256}
6d3962bf
LC
2257
2258void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2259 Error **errp)
2260{
2261 FILE *f;
2262 uint32_t l;
2263 uint8_t buf[1024];
2264
2265 f = fopen(filename, "wb");
2266 if (!f) {
618da851 2267 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2268 return;
2269 }
2270
2271 while (size != 0) {
2272 l = sizeof(buf);
2273 if (l > size)
2274 l = size;
eb6282f2 2275 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2276 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2277 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2278 goto exit;
2279 }
2280 addr += l;
2281 size -= l;
2282 }
2283
2284exit:
2285 fclose(f);
2286}
ab49ab5c
LC
2287
2288void qmp_inject_nmi(Error **errp)
2289{
9cb805fd 2290 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2291}
27498bef 2292
76c86615 2293void dump_drift_info(void)
27498bef
ST
2294{
2295 if (!use_icount) {
2296 return;
2297 }
2298
76c86615 2299 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2300 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2301 if (icount_align_option) {
76c86615
MA
2302 qemu_printf("Max guest delay %"PRIi64" ms\n",
2303 -max_delay / SCALE_MS);
2304 qemu_printf("Max guest advance %"PRIi64" ms\n",
2305 max_advance / SCALE_MS);
27498bef 2306 } else {
76c86615
MA
2307 qemu_printf("Max guest delay NA\n");
2308 qemu_printf("Max guest advance NA\n");
27498bef
ST
2309 }
2310}