]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
qom: Clearer reference counting in object_initialize_childv()
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
9ec374a7 28#include "qemu/cutils.h"
d6454270 29#include "migration/vmstate.h"
83c9089e 30#include "monitor/monitor.h"
e688df6b 31#include "qapi/error.h"
112ed241 32#include "qapi/qapi-commands-misc.h"
9af23989 33#include "qapi/qapi-events-run-state.h"
a4e15de9 34#include "qapi/qmp/qerror.h"
d49b6836 35#include "qemu/error-report.h"
76c86615 36#include "qemu/qemu-print.h"
14a48c1d 37#include "sysemu/tcg.h"
da31d594 38#include "sysemu/block-backend.h"
022c62cb 39#include "exec/gdbstub.h"
9c17d615 40#include "sysemu/dma.h"
b3946626 41#include "sysemu/hw_accel.h"
9c17d615 42#include "sysemu/kvm.h"
b0cb0a66 43#include "sysemu/hax.h"
c97d6d2c 44#include "sysemu/hvf.h"
19306806 45#include "sysemu/whpx.h"
63c91552 46#include "exec/exec-all.h"
296af7c9 47
1de7afc9 48#include "qemu/thread.h"
30865f31 49#include "qemu/plugin.h"
9c17d615
PB
50#include "sysemu/cpus.h"
51#include "sysemu/qtest.h"
1de7afc9 52#include "qemu/main-loop.h"
922a01a0 53#include "qemu/option.h"
1de7afc9 54#include "qemu/bitmap.h"
cb365646 55#include "qemu/seqlock.h"
9c09a251 56#include "qemu/guest-random.h"
dcb32f1d 57#include "tcg/tcg.h"
9cb805fd 58#include "hw/nmi.h"
8b427044 59#include "sysemu/replay.h"
54d31236 60#include "sysemu/runstate.h"
5cc8767d 61#include "hw/boards.h"
650d103d 62#include "hw/hw.h"
0ff0fc19 63
6d9cb73c
JK
64#ifdef CONFIG_LINUX
65
66#include <sys/prctl.h>
67
c0532a76
MT
68#ifndef PR_MCE_KILL
69#define PR_MCE_KILL 33
70#endif
71
6d9cb73c
JK
72#ifndef PR_MCE_KILL_SET
73#define PR_MCE_KILL_SET 1
74#endif
75
76#ifndef PR_MCE_KILL_EARLY
77#define PR_MCE_KILL_EARLY 1
78#endif
79
80#endif /* CONFIG_LINUX */
81
bd1f7ff4
YK
82static QemuMutex qemu_global_mutex;
83
27498bef
ST
84int64_t max_delay;
85int64_t max_advance;
296af7c9 86
2adcc85d
JH
87/* vcpu throttling controls */
88static QEMUTimer *throttle_timer;
89static unsigned int throttle_percentage;
90
91#define CPU_THROTTLE_PCT_MIN 1
92#define CPU_THROTTLE_PCT_MAX 99
93#define CPU_THROTTLE_TIMESLICE_NS 10000000
94
321bc0b2
TC
95bool cpu_is_stopped(CPUState *cpu)
96{
97 return cpu->stopped || !runstate_is_running();
98}
99
a98ae1d8 100static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 101{
c64ca814 102 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
103 return false;
104 }
321bc0b2 105 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
106 return true;
107 }
8c2e1b00 108 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 109 kvm_halt_in_kernel()) {
ac873f1e
PM
110 return false;
111 }
112 return true;
113}
114
115static bool all_cpu_threads_idle(void)
116{
182735ef 117 CPUState *cpu;
ac873f1e 118
bdc44640 119 CPU_FOREACH(cpu) {
182735ef 120 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
121 return false;
122 }
123 }
124 return true;
125}
126
946fb27c
PB
127/***********************************************************/
128/* guest cycle counter */
129
a3270e19
PB
130/* Protected by TimersState seqlock */
131
5045e9d9 132static bool icount_sleep = true;
946fb27c
PB
133/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
134#define MAX_ICOUNT_SHIFT 10
a3270e19 135
946fb27c 136typedef struct TimersState {
cb365646 137 /* Protected by BQL. */
946fb27c
PB
138 int64_t cpu_ticks_prev;
139 int64_t cpu_ticks_offset;
cb365646 140
94377115
PB
141 /* Protect fields that can be respectively read outside the
142 * BQL, and written from multiple threads.
cb365646
LPF
143 */
144 QemuSeqLock vm_clock_seqlock;
94377115
PB
145 QemuSpin vm_clock_lock;
146
147 int16_t cpu_ticks_enabled;
c96778bb 148
c1ff073c 149 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
150 int16_t icount_time_shift;
151
c96778bb
FK
152 /* Compensate for varying guest execution speed. */
153 int64_t qemu_icount_bias;
94377115
PB
154
155 int64_t vm_clock_warp_start;
156 int64_t cpu_clock_offset;
157
c96778bb
FK
158 /* Only written by TCG thread */
159 int64_t qemu_icount;
94377115 160
b39e3f34 161 /* for adjusting icount */
b39e3f34
PD
162 QEMUTimer *icount_rt_timer;
163 QEMUTimer *icount_vm_timer;
164 QEMUTimer *icount_warp_timer;
946fb27c
PB
165} TimersState;
166
d9cd4007 167static TimersState timers_state;
8d4e9146
FK
168bool mttcg_enabled;
169
946fb27c 170
e4cd9657
AB
171/* The current number of executed instructions is based on what we
172 * originally budgeted minus the current state of the decrementing
173 * icount counters in extra/u16.low.
174 */
175static int64_t cpu_get_icount_executed(CPUState *cpu)
176{
5e140196
RH
177 return (cpu->icount_budget -
178 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
179}
180
512d3c80
AB
181/*
182 * Update the global shared timer_state.qemu_icount to take into
183 * account executed instructions. This is done by the TCG vCPU
184 * thread so the main-loop can see time has moved forward.
185 */
9b4e6f49 186static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
187{
188 int64_t executed = cpu_get_icount_executed(cpu);
189 cpu->icount_budget -= executed;
190
38adcb6e
EC
191 atomic_set_i64(&timers_state.qemu_icount,
192 timers_state.qemu_icount + executed);
9b4e6f49
PB
193}
194
195/*
196 * Update the global shared timer_state.qemu_icount to take into
197 * account executed instructions. This is done by the TCG vCPU
198 * thread so the main-loop can see time has moved forward.
199 */
200void cpu_update_icount(CPUState *cpu)
201{
202 seqlock_write_lock(&timers_state.vm_clock_seqlock,
203 &timers_state.vm_clock_lock);
204 cpu_update_icount_locked(cpu);
94377115
PB
205 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206 &timers_state.vm_clock_lock);
512d3c80
AB
207}
208
c1ff073c 209static int64_t cpu_get_icount_raw_locked(void)
946fb27c 210{
4917cf44 211 CPUState *cpu = current_cpu;
946fb27c 212
243c5f77 213 if (cpu && cpu->running) {
414b15c9 214 if (!cpu->can_do_io) {
493d89bf 215 error_report("Bad icount read");
2a62914b 216 exit(1);
946fb27c 217 }
e4cd9657 218 /* Take into account what has run */
9b4e6f49 219 cpu_update_icount_locked(cpu);
946fb27c 220 }
38adcb6e
EC
221 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
222 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
223}
224
2a62914b
PD
225static int64_t cpu_get_icount_locked(void)
226{
c1ff073c 227 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
228 return atomic_read_i64(&timers_state.qemu_icount_bias) +
229 cpu_icount_to_ns(icount);
c1ff073c
PB
230}
231
232int64_t cpu_get_icount_raw(void)
233{
234 int64_t icount;
235 unsigned start;
236
237 do {
238 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239 icount = cpu_get_icount_raw_locked();
240 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242 return icount;
946fb27c
PB
243}
244
c1ff073c 245/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
246int64_t cpu_get_icount(void)
247{
248 int64_t icount;
249 unsigned start;
250
251 do {
252 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253 icount = cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
255
256 return icount;
257}
258
3f031313
FK
259int64_t cpu_icount_to_ns(int64_t icount)
260{
c1ff073c 261 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
262}
263
f2a4ad6d
PB
264static int64_t cpu_get_ticks_locked(void)
265{
266 int64_t ticks = timers_state.cpu_ticks_offset;
267 if (timers_state.cpu_ticks_enabled) {
268 ticks += cpu_get_host_ticks();
269 }
270
271 if (timers_state.cpu_ticks_prev > ticks) {
272 /* Non increasing ticks may happen if the host uses software suspend. */
273 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
274 ticks = timers_state.cpu_ticks_prev;
275 }
276
277 timers_state.cpu_ticks_prev = ticks;
278 return ticks;
279}
280
d90f3cca
C
281/* return the time elapsed in VM between vm_start and vm_stop. Unless
282 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
283 * counter.
d90f3cca 284 */
946fb27c
PB
285int64_t cpu_get_ticks(void)
286{
5f3e3101
PB
287 int64_t ticks;
288
946fb27c
PB
289 if (use_icount) {
290 return cpu_get_icount();
291 }
5f3e3101 292
f2a4ad6d
PB
293 qemu_spin_lock(&timers_state.vm_clock_lock);
294 ticks = cpu_get_ticks_locked();
295 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 296 return ticks;
946fb27c
PB
297}
298
cb365646 299static int64_t cpu_get_clock_locked(void)
946fb27c 300{
1d45cea5 301 int64_t time;
cb365646 302
1d45cea5 303 time = timers_state.cpu_clock_offset;
5f3e3101 304 if (timers_state.cpu_ticks_enabled) {
1d45cea5 305 time += get_clock();
946fb27c 306 }
cb365646 307
1d45cea5 308 return time;
cb365646
LPF
309}
310
d90f3cca 311/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
312 * the time between vm_start and vm_stop
313 */
cb365646
LPF
314int64_t cpu_get_clock(void)
315{
316 int64_t ti;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 ti = cpu_get_clock_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return ti;
946fb27c
PB
325}
326
cb365646 327/* enable cpu_get_ticks()
3224e878 328 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 329 */
946fb27c
PB
330void cpu_enable_ticks(void)
331{
94377115
PB
332 seqlock_write_lock(&timers_state.vm_clock_seqlock,
333 &timers_state.vm_clock_lock);
946fb27c 334 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 335 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
336 timers_state.cpu_clock_offset -= get_clock();
337 timers_state.cpu_ticks_enabled = 1;
338 }
94377115
PB
339 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
340 &timers_state.vm_clock_lock);
946fb27c
PB
341}
342
343/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 344 * cpu_get_ticks() after that.
3224e878 345 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 346 */
946fb27c
PB
347void cpu_disable_ticks(void)
348{
94377115
PB
349 seqlock_write_lock(&timers_state.vm_clock_seqlock,
350 &timers_state.vm_clock_lock);
946fb27c 351 if (timers_state.cpu_ticks_enabled) {
4a7428c5 352 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 353 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
354 timers_state.cpu_ticks_enabled = 0;
355 }
94377115
PB
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
357 &timers_state.vm_clock_lock);
946fb27c
PB
358}
359
360/* Correlation between real and virtual time is always going to be
361 fairly approximate, so ignore small variation.
362 When the guest is idle real and virtual time will be aligned in
363 the IO wait loop. */
73bcb24d 364#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
365
366static void icount_adjust(void)
367{
368 int64_t cur_time;
369 int64_t cur_icount;
370 int64_t delta;
a3270e19
PB
371
372 /* Protected by TimersState mutex. */
946fb27c 373 static int64_t last_delta;
468cc7cf 374
946fb27c
PB
375 /* If the VM is not running, then do nothing. */
376 if (!runstate_is_running()) {
377 return;
378 }
468cc7cf 379
94377115
PB
380 seqlock_write_lock(&timers_state.vm_clock_seqlock,
381 &timers_state.vm_clock_lock);
17a15f1b
PB
382 cur_time = cpu_get_clock_locked();
383 cur_icount = cpu_get_icount_locked();
468cc7cf 384
946fb27c
PB
385 delta = cur_icount - cur_time;
386 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
387 if (delta > 0
388 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 389 && timers_state.icount_time_shift > 0) {
946fb27c 390 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
391 atomic_set(&timers_state.icount_time_shift,
392 timers_state.icount_time_shift - 1);
946fb27c
PB
393 }
394 if (delta < 0
395 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 396 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 397 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
398 atomic_set(&timers_state.icount_time_shift,
399 timers_state.icount_time_shift + 1);
946fb27c
PB
400 }
401 last_delta = delta;
c97595d1
EC
402 atomic_set_i64(&timers_state.qemu_icount_bias,
403 cur_icount - (timers_state.qemu_icount
404 << timers_state.icount_time_shift));
94377115
PB
405 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
406 &timers_state.vm_clock_lock);
946fb27c
PB
407}
408
409static void icount_adjust_rt(void *opaque)
410{
b39e3f34 411 timer_mod(timers_state.icount_rt_timer,
1979b908 412 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
413 icount_adjust();
414}
415
416static void icount_adjust_vm(void *opaque)
417{
b39e3f34 418 timer_mod(timers_state.icount_vm_timer,
40daca54 419 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 420 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
421 icount_adjust();
422}
423
424static int64_t qemu_icount_round(int64_t count)
425{
c1ff073c
PB
426 int shift = atomic_read(&timers_state.icount_time_shift);
427 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
428}
429
efab87cf 430static void icount_warp_rt(void)
946fb27c 431{
ccffff48
AB
432 unsigned seq;
433 int64_t warp_start;
434
17a15f1b
PB
435 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
436 * changes from -1 to another value, so the race here is okay.
437 */
ccffff48
AB
438 do {
439 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 440 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
441 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
442
443 if (warp_start == -1) {
946fb27c
PB
444 return;
445 }
446
94377115
PB
447 seqlock_write_lock(&timers_state.vm_clock_seqlock,
448 &timers_state.vm_clock_lock);
946fb27c 449 if (runstate_is_running()) {
74c0b816
PB
450 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
451 cpu_get_clock_locked());
8ed961d9
PB
452 int64_t warp_delta;
453
b39e3f34 454 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 455 if (use_icount == 2) {
946fb27c 456 /*
40daca54 457 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
458 * far ahead of real time.
459 */
17a15f1b 460 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 461 int64_t delta = clock - cur_icount;
8ed961d9 462 warp_delta = MIN(warp_delta, delta);
946fb27c 463 }
c97595d1
EC
464 atomic_set_i64(&timers_state.qemu_icount_bias,
465 timers_state.qemu_icount_bias + warp_delta);
946fb27c 466 }
b39e3f34 467 timers_state.vm_clock_warp_start = -1;
94377115
PB
468 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
469 &timers_state.vm_clock_lock);
8ed961d9
PB
470
471 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
472 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
473 }
946fb27c
PB
474}
475
e76d1798 476static void icount_timer_cb(void *opaque)
efab87cf 477{
e76d1798
PD
478 /* No need for a checkpoint because the timer already synchronizes
479 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
480 */
481 icount_warp_rt();
efab87cf
PD
482}
483
8156be56
PB
484void qtest_clock_warp(int64_t dest)
485{
40daca54 486 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 487 AioContext *aio_context;
8156be56 488 assert(qtest_enabled());
efef88b3 489 aio_context = qemu_get_aio_context();
8156be56 490 while (clock < dest) {
dcb15780
PD
491 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
492 QEMU_TIMER_ATTR_ALL);
c9299e2f 493 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 494
94377115
PB
495 seqlock_write_lock(&timers_state.vm_clock_seqlock,
496 &timers_state.vm_clock_lock);
c97595d1
EC
497 atomic_set_i64(&timers_state.qemu_icount_bias,
498 timers_state.qemu_icount_bias + warp);
94377115
PB
499 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
500 &timers_state.vm_clock_lock);
17a15f1b 501
40daca54 502 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 503 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 504 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 505 }
40daca54 506 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
507}
508
e76d1798 509void qemu_start_warp_timer(void)
946fb27c 510{
ce78d18c 511 int64_t clock;
946fb27c
PB
512 int64_t deadline;
513
e76d1798 514 if (!use_icount) {
946fb27c
PB
515 return;
516 }
517
8bd7f71d
PD
518 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
519 * do not fire, so computing the deadline does not make sense.
520 */
521 if (!runstate_is_running()) {
522 return;
523 }
524
0c08185f
PD
525 if (replay_mode != REPLAY_MODE_PLAY) {
526 if (!all_cpu_threads_idle()) {
527 return;
528 }
8bd7f71d 529
0c08185f
PD
530 if (qtest_enabled()) {
531 /* When testing, qtest commands advance icount. */
532 return;
533 }
946fb27c 534
0c08185f
PD
535 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
536 } else {
537 /* warp clock deterministically in record/replay mode */
538 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
539 /* vCPU is sleeping and warp can't be started.
540 It is probably a race condition: notification sent
541 to vCPU was processed in advance and vCPU went to sleep.
542 Therefore we have to wake it up for doing someting. */
543 if (replay_has_checkpoint()) {
544 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
545 }
546 return;
547 }
8156be56
PB
548 }
549
ac70aafc 550 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 551 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
dcb15780
PD
552 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
553 ~QEMU_TIMER_ATTR_EXTERNAL);
ce78d18c 554 if (deadline < 0) {
d7a0f71d
VC
555 static bool notified;
556 if (!icount_sleep && !notified) {
3dc6f869 557 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
558 notified = true;
559 }
ce78d18c 560 return;
ac70aafc
AB
561 }
562
946fb27c
PB
563 if (deadline > 0) {
564 /*
40daca54 565 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
566 * sleep. Otherwise, the CPU might be waiting for a future timer
567 * interrupt to wake it up, but the interrupt never comes because
568 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 569 * QEMU_CLOCK_VIRTUAL.
946fb27c 570 */
5045e9d9
VC
571 if (!icount_sleep) {
572 /*
573 * We never let VCPUs sleep in no sleep icount mode.
574 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
575 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
576 * It is useful when we want a deterministic execution time,
577 * isolated from host latencies.
578 */
94377115
PB
579 seqlock_write_lock(&timers_state.vm_clock_seqlock,
580 &timers_state.vm_clock_lock);
c97595d1
EC
581 atomic_set_i64(&timers_state.qemu_icount_bias,
582 timers_state.qemu_icount_bias + deadline);
94377115
PB
583 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
584 &timers_state.vm_clock_lock);
5045e9d9
VC
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
587 /*
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
594 */
94377115
PB
595 seqlock_write_lock(&timers_state.vm_clock_seqlock,
596 &timers_state.vm_clock_lock);
b39e3f34
PD
597 if (timers_state.vm_clock_warp_start == -1
598 || timers_state.vm_clock_warp_start > clock) {
599 timers_state.vm_clock_warp_start = clock;
5045e9d9 600 }
94377115
PB
601 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
602 &timers_state.vm_clock_lock);
b39e3f34
PD
603 timer_mod_anticipate(timers_state.icount_warp_timer,
604 clock + deadline);
ce78d18c 605 }
ac70aafc 606 } else if (deadline == 0) {
40daca54 607 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
608 }
609}
610
e76d1798
PD
611static void qemu_account_warp_timer(void)
612{
613 if (!use_icount || !icount_sleep) {
614 return;
615 }
616
617 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
618 * do not fire, so computing the deadline does not make sense.
619 */
620 if (!runstate_is_running()) {
621 return;
622 }
623
624 /* warp clock deterministically in record/replay mode */
625 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
626 return;
627 }
628
b39e3f34 629 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
630 icount_warp_rt();
631}
632
d09eae37
FK
633static bool icount_state_needed(void *opaque)
634{
635 return use_icount;
636}
637
b39e3f34
PD
638static bool warp_timer_state_needed(void *opaque)
639{
640 TimersState *s = opaque;
641 return s->icount_warp_timer != NULL;
642}
643
644static bool adjust_timers_state_needed(void *opaque)
645{
646 TimersState *s = opaque;
647 return s->icount_rt_timer != NULL;
648}
649
650/*
651 * Subsection for warp timer migration is optional, because may not be created
652 */
653static const VMStateDescription icount_vmstate_warp_timer = {
654 .name = "timer/icount/warp_timer",
655 .version_id = 1,
656 .minimum_version_id = 1,
657 .needed = warp_timer_state_needed,
658 .fields = (VMStateField[]) {
659 VMSTATE_INT64(vm_clock_warp_start, TimersState),
660 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
661 VMSTATE_END_OF_LIST()
662 }
663};
664
665static const VMStateDescription icount_vmstate_adjust_timers = {
666 .name = "timer/icount/timers",
667 .version_id = 1,
668 .minimum_version_id = 1,
669 .needed = adjust_timers_state_needed,
670 .fields = (VMStateField[]) {
671 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
672 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
673 VMSTATE_END_OF_LIST()
674 }
675};
676
d09eae37
FK
677/*
678 * This is a subsection for icount migration.
679 */
680static const VMStateDescription icount_vmstate_timers = {
681 .name = "timer/icount",
682 .version_id = 1,
683 .minimum_version_id = 1,
5cd8cada 684 .needed = icount_state_needed,
d09eae37
FK
685 .fields = (VMStateField[]) {
686 VMSTATE_INT64(qemu_icount_bias, TimersState),
687 VMSTATE_INT64(qemu_icount, TimersState),
688 VMSTATE_END_OF_LIST()
b39e3f34
PD
689 },
690 .subsections = (const VMStateDescription*[]) {
691 &icount_vmstate_warp_timer,
692 &icount_vmstate_adjust_timers,
693 NULL
d09eae37
FK
694 }
695};
696
946fb27c
PB
697static const VMStateDescription vmstate_timers = {
698 .name = "timer",
699 .version_id = 2,
700 .minimum_version_id = 1,
35d08458 701 .fields = (VMStateField[]) {
946fb27c 702 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 703 VMSTATE_UNUSED(8),
946fb27c
PB
704 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
705 VMSTATE_END_OF_LIST()
d09eae37 706 },
5cd8cada
JQ
707 .subsections = (const VMStateDescription*[]) {
708 &icount_vmstate_timers,
709 NULL
946fb27c
PB
710 }
711};
712
14e6fe12 713static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 714{
2adcc85d
JH
715 double pct;
716 double throttle_ratio;
bd1f7ff4 717 int64_t sleeptime_ns, endtime_ns;
2adcc85d
JH
718
719 if (!cpu_throttle_get_percentage()) {
720 return;
721 }
722
723 pct = (double)cpu_throttle_get_percentage()/100;
724 throttle_ratio = pct / (1 - pct);
bd1f7ff4
YK
725 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
726 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
727 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
728 while (sleeptime_ns > 0 && !cpu->stop) {
729 if (sleeptime_ns > SCALE_MS) {
730 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
731 sleeptime_ns / SCALE_MS);
732 } else {
733 qemu_mutex_unlock_iothread();
734 g_usleep(sleeptime_ns / SCALE_US);
735 qemu_mutex_lock_iothread();
736 }
737 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
738 }
90bb0c04 739 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
740}
741
742static void cpu_throttle_timer_tick(void *opaque)
743{
744 CPUState *cpu;
745 double pct;
746
747 /* Stop the timer if needed */
748 if (!cpu_throttle_get_percentage()) {
749 return;
750 }
751 CPU_FOREACH(cpu) {
752 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
753 async_run_on_cpu(cpu, cpu_throttle_thread,
754 RUN_ON_CPU_NULL);
2adcc85d
JH
755 }
756 }
757
758 pct = (double)cpu_throttle_get_percentage()/100;
759 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
760 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
761}
762
763void cpu_throttle_set(int new_throttle_pct)
764{
765 /* Ensure throttle percentage is within valid range */
766 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
767 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
768
769 atomic_set(&throttle_percentage, new_throttle_pct);
770
771 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
772 CPU_THROTTLE_TIMESLICE_NS);
773}
774
775void cpu_throttle_stop(void)
776{
777 atomic_set(&throttle_percentage, 0);
778}
779
780bool cpu_throttle_active(void)
781{
782 return (cpu_throttle_get_percentage() != 0);
783}
784
785int cpu_throttle_get_percentage(void)
786{
787 return atomic_read(&throttle_percentage);
788}
789
4603ea01
PD
790void cpu_ticks_init(void)
791{
ccdb3c1f 792 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 793 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 794 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
795 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
796 cpu_throttle_timer_tick, NULL);
4603ea01
PD
797}
798
1ad9580b 799void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 800{
abc9bf69
MA
801 const char *option = qemu_opt_get(opts, "shift");
802 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
803 bool align = qemu_opt_get_bool(opts, "align", false);
804 long time_shift = -1;
1ad9580b 805
abc9bf69
MA
806 if (!option && qemu_opt_get(opts, "align")) {
807 error_setg(errp, "Please specify shift option when using align");
946fb27c
PB
808 return;
809 }
f1f4b57e 810
abc9bf69 811 if (align && !sleep) {
778d9f9b 812 error_setg(errp, "align=on and sleep=off are incompatible");
abc9bf69 813 return;
f1f4b57e 814 }
abc9bf69 815
946fb27c 816 if (strcmp(option, "auto") != 0) {
9ec374a7
MA
817 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
818 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
a8bfac37 819 error_setg(errp, "icount: Invalid shift value");
abc9bf69 820 return;
a8bfac37 821 }
a8bfac37
ST
822 } else if (icount_align_option) {
823 error_setg(errp, "shift=auto and align=on are incompatible");
abc9bf69 824 return;
f1f4b57e 825 } else if (!icount_sleep) {
778d9f9b 826 error_setg(errp, "shift=auto and sleep=off are incompatible");
abc9bf69
MA
827 return;
828 }
829
830 icount_sleep = sleep;
831 if (icount_sleep) {
832 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
833 icount_timer_cb, NULL);
834 }
835
836 icount_align_option = align;
837
838 if (time_shift >= 0) {
839 timers_state.icount_time_shift = time_shift;
840 use_icount = 1;
841 return;
946fb27c
PB
842 }
843
844 use_icount = 2;
845
846 /* 125MIPS seems a reasonable initial guess at the guest speed.
847 It will be corrected fairly quickly anyway. */
c1ff073c 848 timers_state.icount_time_shift = 3;
946fb27c
PB
849
850 /* Have both realtime and virtual time triggers for speed adjustment.
851 The realtime trigger catches emulated time passing too slowly,
852 the virtual time trigger catches emulated time passing too fast.
853 Realtime triggers occur even when idle, so use them less frequently
854 than VM triggers. */
b39e3f34
PD
855 timers_state.vm_clock_warp_start = -1;
856 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 857 icount_adjust_rt, NULL);
b39e3f34 858 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 859 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 860 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 861 icount_adjust_vm, NULL);
b39e3f34 862 timer_mod(timers_state.icount_vm_timer,
40daca54 863 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 864 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
865}
866
6546706d
AB
867/***********************************************************/
868/* TCG vCPU kick timer
869 *
870 * The kick timer is responsible for moving single threaded vCPU
871 * emulation on to the next vCPU. If more than one vCPU is running a
872 * timer event with force a cpu->exit so the next vCPU can get
873 * scheduled.
874 *
875 * The timer is removed if all vCPUs are idle and restarted again once
876 * idleness is complete.
877 */
878
879static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 880static CPUState *tcg_current_rr_cpu;
6546706d
AB
881
882#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
883
884static inline int64_t qemu_tcg_next_kick(void)
885{
886 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
887}
888
e8f22f76
AB
889/* Kick the currently round-robin scheduled vCPU to next */
890static void qemu_cpu_kick_rr_next_cpu(void)
791158d9
AB
891{
892 CPUState *cpu;
791158d9
AB
893 do {
894 cpu = atomic_mb_read(&tcg_current_rr_cpu);
895 if (cpu) {
896 cpu_exit(cpu);
897 }
898 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
899}
900
e8f22f76
AB
901/* Kick all RR vCPUs */
902static void qemu_cpu_kick_rr_cpus(void)
903{
904 CPUState *cpu;
905
906 CPU_FOREACH(cpu) {
907 cpu_exit(cpu);
908 };
909}
910
6b8f0187
PB
911static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
912{
913}
914
3f53bc61
PB
915void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
916{
6b8f0187
PB
917 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
918 qemu_notify_event();
919 return;
920 }
921
c52e7132
PM
922 if (qemu_in_vcpu_thread()) {
923 /* A CPU is currently running; kick it back out to the
924 * tcg_cpu_exec() loop so it will recalculate its
925 * icount deadline immediately.
926 */
927 qemu_cpu_kick(current_cpu);
928 } else if (first_cpu) {
6b8f0187
PB
929 /* qemu_cpu_kick is not enough to kick a halted CPU out of
930 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
931 * causes cpu_thread_is_idle to return false. This way,
932 * handle_icount_deadline can run.
c52e7132
PM
933 * If we have no CPUs at all for some reason, we don't
934 * need to do anything.
6b8f0187
PB
935 */
936 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
937 }
3f53bc61
PB
938}
939
6546706d
AB
940static void kick_tcg_thread(void *opaque)
941{
942 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
e8f22f76 943 qemu_cpu_kick_rr_next_cpu();
6546706d
AB
944}
945
946static void start_tcg_kick_timer(void)
947{
db08b687
PB
948 assert(!mttcg_enabled);
949 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
950 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
951 kick_tcg_thread, NULL);
1926ab27
AB
952 }
953 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
954 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
955 }
956}
957
958static void stop_tcg_kick_timer(void)
959{
db08b687 960 assert(!mttcg_enabled);
1926ab27 961 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 962 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
963 }
964}
965
296af7c9
BS
966/***********************************************************/
967void hw_error(const char *fmt, ...)
968{
969 va_list ap;
55e5c285 970 CPUState *cpu;
296af7c9
BS
971
972 va_start(ap, fmt);
973 fprintf(stderr, "qemu: hardware error: ");
974 vfprintf(stderr, fmt, ap);
975 fprintf(stderr, "\n");
bdc44640 976 CPU_FOREACH(cpu) {
55e5c285 977 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 978 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
979 }
980 va_end(ap);
981 abort();
982}
983
984void cpu_synchronize_all_states(void)
985{
182735ef 986 CPUState *cpu;
296af7c9 987
bdc44640 988 CPU_FOREACH(cpu) {
182735ef 989 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
990 /* TODO: move to cpu_synchronize_state() */
991 if (hvf_enabled()) {
992 hvf_cpu_synchronize_state(cpu);
993 }
296af7c9
BS
994 }
995}
996
997void cpu_synchronize_all_post_reset(void)
998{
182735ef 999 CPUState *cpu;
296af7c9 1000
bdc44640 1001 CPU_FOREACH(cpu) {
182735ef 1002 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1003 /* TODO: move to cpu_synchronize_post_reset() */
1004 if (hvf_enabled()) {
1005 hvf_cpu_synchronize_post_reset(cpu);
1006 }
296af7c9
BS
1007 }
1008}
1009
1010void cpu_synchronize_all_post_init(void)
1011{
182735ef 1012 CPUState *cpu;
296af7c9 1013
bdc44640 1014 CPU_FOREACH(cpu) {
182735ef 1015 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1016 /* TODO: move to cpu_synchronize_post_init() */
1017 if (hvf_enabled()) {
1018 hvf_cpu_synchronize_post_init(cpu);
1019 }
296af7c9
BS
1020 }
1021}
1022
75e972da
DG
1023void cpu_synchronize_all_pre_loadvm(void)
1024{
1025 CPUState *cpu;
1026
1027 CPU_FOREACH(cpu) {
1028 cpu_synchronize_pre_loadvm(cpu);
1029 }
1030}
1031
4486e89c 1032static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1033{
56983463
KW
1034 int ret = 0;
1035
1354869c 1036 if (runstate_is_running()) {
f962cac4 1037 runstate_set(state);
296af7c9 1038 cpu_disable_ticks();
296af7c9 1039 pause_all_vcpus();
1dfb4dd9 1040 vm_state_notify(0, state);
4486e89c 1041 if (send_stop) {
3ab72385 1042 qapi_event_send_stop();
4486e89c 1043 }
296af7c9 1044 }
56983463 1045
594a45ce 1046 bdrv_drain_all();
22af08ea 1047 ret = bdrv_flush_all();
594a45ce 1048
56983463 1049 return ret;
296af7c9
BS
1050}
1051
4486e89c
SH
1052/* Special vm_stop() variant for terminating the process. Historically clients
1053 * did not expect a QMP STOP event and so we need to retain compatibility.
1054 */
1055int vm_shutdown(void)
1056{
1057 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1058}
1059
a1fcaa73 1060static bool cpu_can_run(CPUState *cpu)
296af7c9 1061{
4fdeee7c 1062 if (cpu->stop) {
a1fcaa73 1063 return false;
0ab07c62 1064 }
321bc0b2 1065 if (cpu_is_stopped(cpu)) {
a1fcaa73 1066 return false;
0ab07c62 1067 }
a1fcaa73 1068 return true;
296af7c9
BS
1069}
1070
91325046 1071static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1072{
64f6b346 1073 gdb_set_stop_cpu(cpu);
8cf71710 1074 qemu_system_debug_request();
f324e766 1075 cpu->stopped = true;
3c638d06
JK
1076}
1077
6d9cb73c
JK
1078#ifdef CONFIG_LINUX
1079static void sigbus_reraise(void)
1080{
1081 sigset_t set;
1082 struct sigaction action;
1083
1084 memset(&action, 0, sizeof(action));
1085 action.sa_handler = SIG_DFL;
1086 if (!sigaction(SIGBUS, &action, NULL)) {
1087 raise(SIGBUS);
1088 sigemptyset(&set);
1089 sigaddset(&set, SIGBUS);
a2d1761d 1090 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1091 }
1092 perror("Failed to re-raise SIGBUS!\n");
1093 abort();
1094}
1095
d98d4072 1096static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1097{
a16fc07e
PB
1098 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1099 sigbus_reraise();
1100 }
1101
2ae41db2
PB
1102 if (current_cpu) {
1103 /* Called asynchronously in VCPU thread. */
1104 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1105 sigbus_reraise();
1106 }
1107 } else {
1108 /* Called synchronously (via signalfd) in main thread. */
1109 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1110 sigbus_reraise();
1111 }
6d9cb73c
JK
1112 }
1113}
1114
1115static void qemu_init_sigbus(void)
1116{
1117 struct sigaction action;
1118
1119 memset(&action, 0, sizeof(action));
1120 action.sa_flags = SA_SIGINFO;
d98d4072 1121 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1122 sigaction(SIGBUS, &action, NULL);
1123
1124 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1125}
6d9cb73c 1126#else /* !CONFIG_LINUX */
6d9cb73c
JK
1127static void qemu_init_sigbus(void)
1128{
1129}
a16fc07e 1130#endif /* !CONFIG_LINUX */
ff48eb5f 1131
296af7c9
BS
1132static QemuThread io_thread;
1133
296af7c9
BS
1134/* cpu creation */
1135static QemuCond qemu_cpu_cond;
1136/* system init */
296af7c9
BS
1137static QemuCond qemu_pause_cond;
1138
d3b12f5d 1139void qemu_init_cpu_loop(void)
296af7c9 1140{
6d9cb73c 1141 qemu_init_sigbus();
ed94592b 1142 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1143 qemu_cond_init(&qemu_pause_cond);
296af7c9 1144 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1145
b7680cb6 1146 qemu_thread_get_self(&io_thread);
296af7c9
BS
1147}
1148
14e6fe12 1149void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1150{
d148d90e 1151 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1152}
1153
4c055ab5
GZ
1154static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1155{
1156 if (kvm_destroy_vcpu(cpu) < 0) {
1157 error_report("kvm_destroy_vcpu failed");
1158 exit(EXIT_FAILURE);
1159 }
1160}
1161
1162static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1163{
1164}
1165
ebd05fea
DH
1166static void qemu_cpu_stop(CPUState *cpu, bool exit)
1167{
1168 g_assert(qemu_cpu_is_self(cpu));
1169 cpu->stop = false;
1170 cpu->stopped = true;
1171 if (exit) {
1172 cpu_exit(cpu);
1173 }
1174 qemu_cond_broadcast(&qemu_pause_cond);
1175}
1176
509a0d78 1177static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1178{
37257942 1179 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1180 if (cpu->stop) {
ebd05fea 1181 qemu_cpu_stop(cpu, false);
296af7c9 1182 }
a5403c69 1183 process_queued_cpu_work(cpu);
37257942
AB
1184}
1185
a8efa606 1186static void qemu_tcg_rr_wait_io_event(void)
37257942 1187{
a8efa606
PB
1188 CPUState *cpu;
1189
db08b687 1190 while (all_cpu_threads_idle()) {
6546706d 1191 stop_tcg_kick_timer();
a8efa606 1192 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1193 }
296af7c9 1194
6546706d
AB
1195 start_tcg_kick_timer();
1196
a8efa606
PB
1197 CPU_FOREACH(cpu) {
1198 qemu_wait_io_event_common(cpu);
1199 }
296af7c9
BS
1200}
1201
db08b687 1202static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1203{
30865f31
EC
1204 bool slept = false;
1205
a98ae1d8 1206 while (cpu_thread_is_idle(cpu)) {
30865f31
EC
1207 if (!slept) {
1208 slept = true;
1209 qemu_plugin_vcpu_idle_cb(cpu);
1210 }
f5c121b8 1211 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1212 }
30865f31
EC
1213 if (slept) {
1214 qemu_plugin_vcpu_resume_cb(cpu);
1215 }
296af7c9 1216
db08b687
PB
1217#ifdef _WIN32
1218 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1219 if (!tcg_enabled()) {
1220 SleepEx(0, TRUE);
c97d6d2c 1221 }
db08b687 1222#endif
c97d6d2c
SAGDR
1223 qemu_wait_io_event_common(cpu);
1224}
1225
7e97cd88 1226static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1227{
48a106bd 1228 CPUState *cpu = arg;
84b4915d 1229 int r;
296af7c9 1230
ab28bd23
PB
1231 rcu_register_thread();
1232
2e7f7a3c 1233 qemu_mutex_lock_iothread();
814e612e 1234 qemu_thread_get_self(cpu->thread);
9f09e18a 1235 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1236 cpu->can_do_io = 1;
4917cf44 1237 current_cpu = cpu;
296af7c9 1238
504134d2 1239 r = kvm_init_vcpu(cpu);
84b4915d 1240 if (r < 0) {
493d89bf 1241 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1242 exit(1);
1243 }
296af7c9 1244
18268b60 1245 kvm_init_cpu_signals(cpu);
296af7c9
BS
1246
1247 /* signal CPU creation */
61a46217 1248 cpu->created = true;
296af7c9 1249 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1250 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1251
4c055ab5 1252 do {
a1fcaa73 1253 if (cpu_can_run(cpu)) {
1458c363 1254 r = kvm_cpu_exec(cpu);
83f338f7 1255 if (r == EXCP_DEBUG) {
91325046 1256 cpu_handle_guest_debug(cpu);
83f338f7 1257 }
0ab07c62 1258 }
db08b687 1259 qemu_wait_io_event(cpu);
4c055ab5 1260 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1261
4c055ab5 1262 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1263 cpu->created = false;
1264 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1265 qemu_mutex_unlock_iothread();
57615ed5 1266 rcu_unregister_thread();
296af7c9
BS
1267 return NULL;
1268}
1269
c7f0f3b1
AL
1270static void *qemu_dummy_cpu_thread_fn(void *arg)
1271{
1272#ifdef _WIN32
493d89bf 1273 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1274 exit(1);
1275#else
10a9021d 1276 CPUState *cpu = arg;
c7f0f3b1
AL
1277 sigset_t waitset;
1278 int r;
1279
ab28bd23
PB
1280 rcu_register_thread();
1281
c7f0f3b1 1282 qemu_mutex_lock_iothread();
814e612e 1283 qemu_thread_get_self(cpu->thread);
9f09e18a 1284 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1285 cpu->can_do_io = 1;
37257942 1286 current_cpu = cpu;
c7f0f3b1
AL
1287
1288 sigemptyset(&waitset);
1289 sigaddset(&waitset, SIG_IPI);
1290
1291 /* signal CPU creation */
61a46217 1292 cpu->created = true;
c7f0f3b1 1293 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1294 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1295
d2831ab0 1296 do {
c7f0f3b1
AL
1297 qemu_mutex_unlock_iothread();
1298 do {
1299 int sig;
1300 r = sigwait(&waitset, &sig);
1301 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1302 if (r == -1) {
1303 perror("sigwait");
1304 exit(1);
1305 }
1306 qemu_mutex_lock_iothread();
db08b687 1307 qemu_wait_io_event(cpu);
d2831ab0 1308 } while (!cpu->unplug);
c7f0f3b1 1309
d40bfcbb 1310 qemu_mutex_unlock_iothread();
d2831ab0 1311 rcu_unregister_thread();
c7f0f3b1
AL
1312 return NULL;
1313#endif
1314}
1315
1be7fcb8
AB
1316static int64_t tcg_get_icount_limit(void)
1317{
1318 int64_t deadline;
1319
1320 if (replay_mode != REPLAY_MODE_PLAY) {
dcb15780
PD
1321 /*
1322 * Include all the timers, because they may need an attention.
1323 * Too long CPU execution may create unnecessary delay in UI.
1324 */
1325 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1326 QEMU_TIMER_ATTR_ALL);
fc6b2dba
PD
1327 /* Check realtime timers, because they help with input processing */
1328 deadline = qemu_soonest_timeout(deadline,
1329 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1330 QEMU_TIMER_ATTR_ALL));
1be7fcb8
AB
1331
1332 /* Maintain prior (possibly buggy) behaviour where if no deadline
1333 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1334 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1335 * nanoseconds.
1336 */
1337 if ((deadline < 0) || (deadline > INT32_MAX)) {
1338 deadline = INT32_MAX;
1339 }
1340
1341 return qemu_icount_round(deadline);
1342 } else {
1343 return replay_get_instructions();
1344 }
1345}
1346
12e9700d
AB
1347static void handle_icount_deadline(void)
1348{
6b8f0187 1349 assert(qemu_in_vcpu_thread());
12e9700d 1350 if (use_icount) {
dcb15780
PD
1351 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1352 QEMU_TIMER_ATTR_ALL);
12e9700d
AB
1353
1354 if (deadline == 0) {
6b8f0187 1355 /* Wake up other AioContexts. */
12e9700d 1356 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1357 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1358 }
1359 }
1360}
1361
05248382 1362static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1363{
1be7fcb8 1364 if (use_icount) {
eda5f7c6 1365 int insns_left;
05248382
AB
1366
1367 /* These should always be cleared by process_icount_data after
1368 * each vCPU execution. However u16.high can be raised
1369 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1370 */
5e140196 1371 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1372 g_assert(cpu->icount_extra == 0);
1373
eda5f7c6
AB
1374 cpu->icount_budget = tcg_get_icount_limit();
1375 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1376 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1377 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1378
1379 replay_mutex_lock();
1be7fcb8 1380 }
05248382
AB
1381}
1382
1383static void process_icount_data(CPUState *cpu)
1384{
1be7fcb8 1385 if (use_icount) {
e4cd9657 1386 /* Account for executed instructions */
512d3c80 1387 cpu_update_icount(cpu);
05248382
AB
1388
1389 /* Reset the counters */
5e140196 1390 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1391 cpu->icount_extra = 0;
e4cd9657
AB
1392 cpu->icount_budget = 0;
1393
1be7fcb8 1394 replay_account_executed_instructions();
d759c951
AB
1395
1396 replay_mutex_unlock();
1be7fcb8 1397 }
05248382
AB
1398}
1399
1400
1401static int tcg_cpu_exec(CPUState *cpu)
1402{
1403 int ret;
1404#ifdef CONFIG_PROFILER
1405 int64_t ti;
1406#endif
1407
f28d0dfd 1408 assert(tcg_enabled());
05248382
AB
1409#ifdef CONFIG_PROFILER
1410 ti = profile_getclock();
1411#endif
05248382
AB
1412 cpu_exec_start(cpu);
1413 ret = cpu_exec(cpu);
1414 cpu_exec_end(cpu);
05248382 1415#ifdef CONFIG_PROFILER
72fd2efb
EC
1416 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1417 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1418#endif
1be7fcb8
AB
1419 return ret;
1420}
1421
c93bbbef
AB
1422/* Destroy any remaining vCPUs which have been unplugged and have
1423 * finished running
1424 */
1425static void deal_with_unplugged_cpus(void)
1be7fcb8 1426{
c93bbbef 1427 CPUState *cpu;
1be7fcb8 1428
c93bbbef
AB
1429 CPU_FOREACH(cpu) {
1430 if (cpu->unplug && !cpu_can_run(cpu)) {
1431 qemu_tcg_destroy_vcpu(cpu);
1432 cpu->created = false;
1433 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1434 break;
1435 }
1436 }
1be7fcb8 1437}
bdb7ca67 1438
6546706d
AB
1439/* Single-threaded TCG
1440 *
1441 * In the single-threaded case each vCPU is simulated in turn. If
1442 * there is more than a single vCPU we create a simple timer to kick
1443 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1444 * This is done explicitly rather than relying on side-effects
1445 * elsewhere.
1446 */
1447
37257942 1448static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1449{
c3586ba7 1450 CPUState *cpu = arg;
296af7c9 1451
f28d0dfd 1452 assert(tcg_enabled());
ab28bd23 1453 rcu_register_thread();
3468b59e 1454 tcg_register_thread();
ab28bd23 1455
2e7f7a3c 1456 qemu_mutex_lock_iothread();
814e612e 1457 qemu_thread_get_self(cpu->thread);
296af7c9 1458
5a9c973b
DH
1459 cpu->thread_id = qemu_get_thread_id();
1460 cpu->created = true;
1461 cpu->can_do_io = 1;
296af7c9 1462 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1463 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1464
fa7d1867 1465 /* wait for initial kick-off after machine start */
c28e399c 1466 while (first_cpu->stopped) {
d5f8d613 1467 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1468
1469 /* process any pending work */
bdc44640 1470 CPU_FOREACH(cpu) {
37257942 1471 current_cpu = cpu;
182735ef 1472 qemu_wait_io_event_common(cpu);
8e564b4e 1473 }
0ab07c62 1474 }
296af7c9 1475
6546706d
AB
1476 start_tcg_kick_timer();
1477
c93bbbef
AB
1478 cpu = first_cpu;
1479
e5143e30
AB
1480 /* process any pending work */
1481 cpu->exit_request = 1;
1482
296af7c9 1483 while (1) {
d759c951
AB
1484 qemu_mutex_unlock_iothread();
1485 replay_mutex_lock();
1486 qemu_mutex_lock_iothread();
c93bbbef
AB
1487 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1488 qemu_account_warp_timer();
1489
6b8f0187
PB
1490 /* Run the timers here. This is much more efficient than
1491 * waking up the I/O thread and waiting for completion.
1492 */
1493 handle_icount_deadline();
1494
d759c951
AB
1495 replay_mutex_unlock();
1496
c93bbbef
AB
1497 if (!cpu) {
1498 cpu = first_cpu;
1499 }
1500
e5143e30
AB
1501 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1502
791158d9 1503 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1504 current_cpu = cpu;
c93bbbef
AB
1505
1506 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1507 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1508
1509 if (cpu_can_run(cpu)) {
1510 int r;
05248382 1511
d759c951 1512 qemu_mutex_unlock_iothread();
05248382
AB
1513 prepare_icount_for_run(cpu);
1514
c93bbbef 1515 r = tcg_cpu_exec(cpu);
05248382
AB
1516
1517 process_icount_data(cpu);
d759c951 1518 qemu_mutex_lock_iothread();
05248382 1519
c93bbbef
AB
1520 if (r == EXCP_DEBUG) {
1521 cpu_handle_guest_debug(cpu);
1522 break;
08e73c48
PK
1523 } else if (r == EXCP_ATOMIC) {
1524 qemu_mutex_unlock_iothread();
1525 cpu_exec_step_atomic(cpu);
1526 qemu_mutex_lock_iothread();
1527 break;
c93bbbef 1528 }
37257942 1529 } else if (cpu->stop) {
c93bbbef
AB
1530 if (cpu->unplug) {
1531 cpu = CPU_NEXT(cpu);
1532 }
1533 break;
1534 }
1535
e5143e30
AB
1536 cpu = CPU_NEXT(cpu);
1537 } /* while (cpu && !cpu->exit_request).. */
1538
791158d9
AB
1539 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1540 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1541
e5143e30
AB
1542 if (cpu && cpu->exit_request) {
1543 atomic_mb_set(&cpu->exit_request, 0);
1544 }
ac70aafc 1545
013aabdc
CD
1546 if (use_icount && all_cpu_threads_idle()) {
1547 /*
1548 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1549 * in the main_loop, wake it up in order to start the warp timer.
1550 */
1551 qemu_notify_event();
1552 }
1553
a8efa606 1554 qemu_tcg_rr_wait_io_event();
c93bbbef 1555 deal_with_unplugged_cpus();
296af7c9
BS
1556 }
1557
9b0605f9 1558 rcu_unregister_thread();
296af7c9
BS
1559 return NULL;
1560}
1561
b0cb0a66
VP
1562static void *qemu_hax_cpu_thread_fn(void *arg)
1563{
1564 CPUState *cpu = arg;
1565 int r;
b3d3a426 1566
9857c2d2 1567 rcu_register_thread();
b3d3a426 1568 qemu_mutex_lock_iothread();
b0cb0a66 1569 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1570
1571 cpu->thread_id = qemu_get_thread_id();
1572 cpu->created = true;
b0cb0a66
VP
1573 current_cpu = cpu;
1574
1575 hax_init_vcpu(cpu);
1576 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1577 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1578
9857c2d2 1579 do {
b0cb0a66
VP
1580 if (cpu_can_run(cpu)) {
1581 r = hax_smp_cpu_exec(cpu);
1582 if (r == EXCP_DEBUG) {
1583 cpu_handle_guest_debug(cpu);
1584 }
1585 }
1586
db08b687 1587 qemu_wait_io_event(cpu);
9857c2d2
PB
1588 } while (!cpu->unplug || cpu_can_run(cpu));
1589 rcu_unregister_thread();
b0cb0a66
VP
1590 return NULL;
1591}
1592
c97d6d2c
SAGDR
1593/* The HVF-specific vCPU thread function. This one should only run when the host
1594 * CPU supports the VMX "unrestricted guest" feature. */
1595static void *qemu_hvf_cpu_thread_fn(void *arg)
1596{
1597 CPUState *cpu = arg;
1598
1599 int r;
1600
1601 assert(hvf_enabled());
1602
1603 rcu_register_thread();
1604
1605 qemu_mutex_lock_iothread();
1606 qemu_thread_get_self(cpu->thread);
1607
1608 cpu->thread_id = qemu_get_thread_id();
1609 cpu->can_do_io = 1;
1610 current_cpu = cpu;
1611
1612 hvf_init_vcpu(cpu);
1613
1614 /* signal CPU creation */
1615 cpu->created = true;
1616 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1617 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1618
1619 do {
1620 if (cpu_can_run(cpu)) {
1621 r = hvf_vcpu_exec(cpu);
1622 if (r == EXCP_DEBUG) {
1623 cpu_handle_guest_debug(cpu);
1624 }
1625 }
db08b687 1626 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1627 } while (!cpu->unplug || cpu_can_run(cpu));
1628
1629 hvf_vcpu_destroy(cpu);
1630 cpu->created = false;
1631 qemu_cond_signal(&qemu_cpu_cond);
1632 qemu_mutex_unlock_iothread();
8178e637 1633 rcu_unregister_thread();
c97d6d2c
SAGDR
1634 return NULL;
1635}
1636
19306806
JTV
1637static void *qemu_whpx_cpu_thread_fn(void *arg)
1638{
1639 CPUState *cpu = arg;
1640 int r;
1641
1642 rcu_register_thread();
1643
1644 qemu_mutex_lock_iothread();
1645 qemu_thread_get_self(cpu->thread);
1646 cpu->thread_id = qemu_get_thread_id();
1647 current_cpu = cpu;
1648
1649 r = whpx_init_vcpu(cpu);
1650 if (r < 0) {
1651 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1652 exit(1);
1653 }
1654
1655 /* signal CPU creation */
1656 cpu->created = true;
1657 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1658 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1659
1660 do {
1661 if (cpu_can_run(cpu)) {
1662 r = whpx_vcpu_exec(cpu);
1663 if (r == EXCP_DEBUG) {
1664 cpu_handle_guest_debug(cpu);
1665 }
1666 }
1667 while (cpu_thread_is_idle(cpu)) {
1668 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1669 }
1670 qemu_wait_io_event_common(cpu);
1671 } while (!cpu->unplug || cpu_can_run(cpu));
1672
1673 whpx_destroy_vcpu(cpu);
1674 cpu->created = false;
1675 qemu_cond_signal(&qemu_cpu_cond);
1676 qemu_mutex_unlock_iothread();
1677 rcu_unregister_thread();
c97d6d2c
SAGDR
1678 return NULL;
1679}
1680
b0cb0a66
VP
1681#ifdef _WIN32
1682static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1683{
1684}
1685#endif
1686
37257942
AB
1687/* Multi-threaded TCG
1688 *
1689 * In the multi-threaded case each vCPU has its own thread. The TLS
1690 * variable current_cpu can be used deep in the code to find the
1691 * current CPUState for a given thread.
1692 */
1693
1694static void *qemu_tcg_cpu_thread_fn(void *arg)
1695{
1696 CPUState *cpu = arg;
1697
f28d0dfd 1698 assert(tcg_enabled());
bf51c720
AB
1699 g_assert(!use_icount);
1700
37257942 1701 rcu_register_thread();
3468b59e 1702 tcg_register_thread();
37257942
AB
1703
1704 qemu_mutex_lock_iothread();
1705 qemu_thread_get_self(cpu->thread);
1706
1707 cpu->thread_id = qemu_get_thread_id();
1708 cpu->created = true;
1709 cpu->can_do_io = 1;
1710 current_cpu = cpu;
1711 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1712 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1713
1714 /* process any pending work */
1715 cpu->exit_request = 1;
1716
54961aac 1717 do {
37257942
AB
1718 if (cpu_can_run(cpu)) {
1719 int r;
d759c951 1720 qemu_mutex_unlock_iothread();
37257942 1721 r = tcg_cpu_exec(cpu);
d759c951 1722 qemu_mutex_lock_iothread();
37257942
AB
1723 switch (r) {
1724 case EXCP_DEBUG:
1725 cpu_handle_guest_debug(cpu);
1726 break;
1727 case EXCP_HALTED:
1728 /* during start-up the vCPU is reset and the thread is
1729 * kicked several times. If we don't ensure we go back
1730 * to sleep in the halted state we won't cleanly
1731 * start-up when the vCPU is enabled.
1732 *
1733 * cpu->halted should ensure we sleep in wait_io_event
1734 */
1735 g_assert(cpu->halted);
1736 break;
08e73c48
PK
1737 case EXCP_ATOMIC:
1738 qemu_mutex_unlock_iothread();
1739 cpu_exec_step_atomic(cpu);
1740 qemu_mutex_lock_iothread();
37257942
AB
1741 default:
1742 /* Ignore everything else? */
1743 break;
1744 }
1745 }
1746
37257942 1747 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1748 qemu_wait_io_event(cpu);
9b0605f9 1749 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1750
9b0605f9
PB
1751 qemu_tcg_destroy_vcpu(cpu);
1752 cpu->created = false;
1753 qemu_cond_signal(&qemu_cpu_cond);
1754 qemu_mutex_unlock_iothread();
1755 rcu_unregister_thread();
37257942
AB
1756 return NULL;
1757}
1758
2ff09a40 1759static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1760{
1761#ifndef _WIN32
1762 int err;
1763
e0c38211
PB
1764 if (cpu->thread_kicked) {
1765 return;
9102deda 1766 }
e0c38211 1767 cpu->thread_kicked = true;
814e612e 1768 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1769 if (err && err != ESRCH) {
cc015e9a
PB
1770 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1771 exit(1);
1772 }
1773#else /* _WIN32 */
b0cb0a66 1774 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1775 if (whpx_enabled()) {
1776 whpx_vcpu_kick(cpu);
1777 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1778 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1779 __func__, GetLastError());
1780 exit(1);
1781 }
1782 }
e0c38211
PB
1783#endif
1784}
ed9164a3 1785
c08d7424 1786void qemu_cpu_kick(CPUState *cpu)
296af7c9 1787{
f5c121b8 1788 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1789 if (tcg_enabled()) {
e8f22f76
AB
1790 if (qemu_tcg_mttcg_enabled()) {
1791 cpu_exit(cpu);
1792 } else {
1793 qemu_cpu_kick_rr_cpus();
1794 }
e0c38211 1795 } else {
b0cb0a66
VP
1796 if (hax_enabled()) {
1797 /*
1798 * FIXME: race condition with the exit_request check in
1799 * hax_vcpu_hax_exec
1800 */
1801 cpu->exit_request = 1;
1802 }
e0c38211
PB
1803 qemu_cpu_kick_thread(cpu);
1804 }
296af7c9
BS
1805}
1806
46d62fac 1807void qemu_cpu_kick_self(void)
296af7c9 1808{
4917cf44 1809 assert(current_cpu);
9102deda 1810 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1811}
1812
60e82579 1813bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1814{
814e612e 1815 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1816}
1817
79e2b9ae 1818bool qemu_in_vcpu_thread(void)
aa723c23 1819{
4917cf44 1820 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1821}
1822
afbe7053
PB
1823static __thread bool iothread_locked = false;
1824
1825bool qemu_mutex_iothread_locked(void)
1826{
1827 return iothread_locked;
1828}
1829
cb764d06
EC
1830/*
1831 * The BQL is taken from so many places that it is worth profiling the
1832 * callers directly, instead of funneling them all through a single function.
1833 */
1834void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1835{
cb764d06
EC
1836 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1837
8d04fb55 1838 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1839 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1840 iothread_locked = true;
296af7c9
BS
1841}
1842
1843void qemu_mutex_unlock_iothread(void)
1844{
8d04fb55 1845 g_assert(qemu_mutex_iothread_locked());
afbe7053 1846 iothread_locked = false;
296af7c9
BS
1847 qemu_mutex_unlock(&qemu_global_mutex);
1848}
1849
19e067e0
AP
1850void qemu_cond_wait_iothread(QemuCond *cond)
1851{
1852 qemu_cond_wait(cond, &qemu_global_mutex);
1853}
1854
e8faee06 1855static bool all_vcpus_paused(void)
296af7c9 1856{
bdc44640 1857 CPUState *cpu;
296af7c9 1858
bdc44640 1859 CPU_FOREACH(cpu) {
182735ef 1860 if (!cpu->stopped) {
e8faee06 1861 return false;
0ab07c62 1862 }
296af7c9
BS
1863 }
1864
e8faee06 1865 return true;
296af7c9
BS
1866}
1867
1868void pause_all_vcpus(void)
1869{
bdc44640 1870 CPUState *cpu;
296af7c9 1871
40daca54 1872 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1873 CPU_FOREACH(cpu) {
ebd05fea
DH
1874 if (qemu_cpu_is_self(cpu)) {
1875 qemu_cpu_stop(cpu, true);
1876 } else {
1877 cpu->stop = true;
1878 qemu_cpu_kick(cpu);
1879 }
d798e974
JK
1880 }
1881
d759c951
AB
1882 /* We need to drop the replay_lock so any vCPU threads woken up
1883 * can finish their replay tasks
1884 */
1885 replay_mutex_unlock();
1886
296af7c9 1887 while (!all_vcpus_paused()) {
be7d6c57 1888 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1889 CPU_FOREACH(cpu) {
182735ef 1890 qemu_cpu_kick(cpu);
296af7c9
BS
1891 }
1892 }
d759c951
AB
1893
1894 qemu_mutex_unlock_iothread();
1895 replay_mutex_lock();
1896 qemu_mutex_lock_iothread();
296af7c9
BS
1897}
1898
2993683b
IM
1899void cpu_resume(CPUState *cpu)
1900{
1901 cpu->stop = false;
1902 cpu->stopped = false;
1903 qemu_cpu_kick(cpu);
1904}
1905
296af7c9
BS
1906void resume_all_vcpus(void)
1907{
bdc44640 1908 CPUState *cpu;
296af7c9 1909
f962cac4
LM
1910 if (!runstate_is_running()) {
1911 return;
1912 }
1913
40daca54 1914 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1915 CPU_FOREACH(cpu) {
182735ef 1916 cpu_resume(cpu);
296af7c9
BS
1917 }
1918}
1919
dbadee4f 1920void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1921{
1922 cpu->stop = true;
1923 cpu->unplug = true;
1924 qemu_cpu_kick(cpu);
dbadee4f
PB
1925 qemu_mutex_unlock_iothread();
1926 qemu_thread_join(cpu->thread);
1927 qemu_mutex_lock_iothread();
2c579042
BR
1928}
1929
4900116e
DDAG
1930/* For temporary buffers for forming a name */
1931#define VCPU_THREAD_NAME_SIZE 16
1932
e5ab30a2 1933static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1934{
4900116e 1935 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1936 static QemuCond *single_tcg_halt_cond;
1937 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1938 static int tcg_region_inited;
1939
f28d0dfd 1940 assert(tcg_enabled());
e8feb96f
EC
1941 /*
1942 * Initialize TCG regions--once. Now is a good time, because:
1943 * (1) TCG's init context, prologue and target globals have been set up.
1944 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1945 * -accel flag is processed, so the check doesn't work then).
1946 */
1947 if (!tcg_region_inited) {
1948 tcg_region_inited = 1;
1949 tcg_region_init();
1950 }
4900116e 1951
37257942 1952 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1953 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1954 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1955 qemu_cond_init(cpu->halt_cond);
37257942
AB
1956
1957 if (qemu_tcg_mttcg_enabled()) {
1958 /* create a thread per vCPU with TCG (MTTCG) */
1959 parallel_cpus = true;
1960 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1961 cpu->cpu_index);
37257942
AB
1962
1963 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1964 cpu, QEMU_THREAD_JOINABLE);
1965
1966 } else {
1967 /* share a single thread for all cpus with TCG */
1968 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1969 qemu_thread_create(cpu->thread, thread_name,
1970 qemu_tcg_rr_cpu_thread_fn,
1971 cpu, QEMU_THREAD_JOINABLE);
1972
1973 single_tcg_halt_cond = cpu->halt_cond;
1974 single_tcg_cpu_thread = cpu->thread;
1975 }
1ecf47bf 1976#ifdef _WIN32
814e612e 1977 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1978#endif
296af7c9 1979 } else {
37257942
AB
1980 /* For non-MTTCG cases we share the thread */
1981 cpu->thread = single_tcg_cpu_thread;
1982 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1983 cpu->thread_id = first_cpu->thread_id;
1984 cpu->can_do_io = 1;
1985 cpu->created = true;
296af7c9
BS
1986 }
1987}
1988
b0cb0a66
VP
1989static void qemu_hax_start_vcpu(CPUState *cpu)
1990{
1991 char thread_name[VCPU_THREAD_NAME_SIZE];
1992
1993 cpu->thread = g_malloc0(sizeof(QemuThread));
1994 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1995 qemu_cond_init(cpu->halt_cond);
1996
1997 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1998 cpu->cpu_index);
1999 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2000 cpu, QEMU_THREAD_JOINABLE);
2001#ifdef _WIN32
2002 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2003#endif
b0cb0a66
VP
2004}
2005
48a106bd 2006static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2007{
4900116e
DDAG
2008 char thread_name[VCPU_THREAD_NAME_SIZE];
2009
814e612e 2010 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2011 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2012 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2013 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2014 cpu->cpu_index);
2015 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2016 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2017}
2018
c97d6d2c
SAGDR
2019static void qemu_hvf_start_vcpu(CPUState *cpu)
2020{
2021 char thread_name[VCPU_THREAD_NAME_SIZE];
2022
2023 /* HVF currently does not support TCG, and only runs in
2024 * unrestricted-guest mode. */
2025 assert(hvf_enabled());
2026
2027 cpu->thread = g_malloc0(sizeof(QemuThread));
2028 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2029 qemu_cond_init(cpu->halt_cond);
2030
2031 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2032 cpu->cpu_index);
2033 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2034 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2035}
2036
19306806
JTV
2037static void qemu_whpx_start_vcpu(CPUState *cpu)
2038{
2039 char thread_name[VCPU_THREAD_NAME_SIZE];
2040
2041 cpu->thread = g_malloc0(sizeof(QemuThread));
2042 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2043 qemu_cond_init(cpu->halt_cond);
2044 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2045 cpu->cpu_index);
2046 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2047 cpu, QEMU_THREAD_JOINABLE);
2048#ifdef _WIN32
2049 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2050#endif
19306806
JTV
2051}
2052
10a9021d 2053static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2054{
4900116e
DDAG
2055 char thread_name[VCPU_THREAD_NAME_SIZE];
2056
814e612e 2057 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2058 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2059 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2060 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2061 cpu->cpu_index);
2062 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2063 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2064}
2065
c643bed9 2066void qemu_init_vcpu(CPUState *cpu)
296af7c9 2067{
5cc8767d
LX
2068 MachineState *ms = MACHINE(qdev_get_machine());
2069
2070 cpu->nr_cores = ms->smp.cores;
2071 cpu->nr_threads = ms->smp.threads;
f324e766 2072 cpu->stopped = true;
9c09a251 2073 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2074
2075 if (!cpu->as) {
2076 /* If the target cpu hasn't set up any address spaces itself,
2077 * give it the default one.
2078 */
12ebc9a7 2079 cpu->num_ases = 1;
80ceb07a 2080 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2081 }
2082
0ab07c62 2083 if (kvm_enabled()) {
48a106bd 2084 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2085 } else if (hax_enabled()) {
2086 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2087 } else if (hvf_enabled()) {
2088 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2089 } else if (tcg_enabled()) {
e5ab30a2 2090 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2091 } else if (whpx_enabled()) {
2092 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2093 } else {
10a9021d 2094 qemu_dummy_start_vcpu(cpu);
0ab07c62 2095 }
81e96311
DH
2096
2097 while (!cpu->created) {
2098 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2099 }
296af7c9
BS
2100}
2101
b4a3d965 2102void cpu_stop_current(void)
296af7c9 2103{
4917cf44 2104 if (current_cpu) {
0ec7e677
PM
2105 current_cpu->stop = true;
2106 cpu_exit(current_cpu);
b4a3d965 2107 }
296af7c9
BS
2108}
2109
56983463 2110int vm_stop(RunState state)
296af7c9 2111{
aa723c23 2112 if (qemu_in_vcpu_thread()) {
74892d24 2113 qemu_system_vmstop_request_prepare();
1dfb4dd9 2114 qemu_system_vmstop_request(state);
296af7c9
BS
2115 /*
2116 * FIXME: should not return to device code in case
2117 * vm_stop() has been requested.
2118 */
b4a3d965 2119 cpu_stop_current();
56983463 2120 return 0;
296af7c9 2121 }
56983463 2122
4486e89c 2123 return do_vm_stop(state, true);
296af7c9
BS
2124}
2125
2d76e823
CI
2126/**
2127 * Prepare for (re)starting the VM.
2128 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2129 * running or in case of an error condition), 0 otherwise.
2130 */
2131int vm_prepare_start(void)
2132{
2133 RunState requested;
2d76e823
CI
2134
2135 qemu_vmstop_requested(&requested);
2136 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2137 return -1;
2138 }
2139
2140 /* Ensure that a STOP/RESUME pair of events is emitted if a
2141 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2142 * example, according to documentation is always followed by
2143 * the STOP event.
2144 */
2145 if (runstate_is_running()) {
3ab72385
PX
2146 qapi_event_send_stop();
2147 qapi_event_send_resume();
f056158d 2148 return -1;
2d76e823
CI
2149 }
2150
2151 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2152 qapi_event_send_resume();
f056158d 2153
f056158d
MA
2154 cpu_enable_ticks();
2155 runstate_set(RUN_STATE_RUNNING);
2156 vm_state_notify(1, RUN_STATE_RUNNING);
2157 return 0;
2d76e823
CI
2158}
2159
2160void vm_start(void)
2161{
2162 if (!vm_prepare_start()) {
2163 resume_all_vcpus();
2164 }
2165}
2166
8a9236f1
LC
2167/* does a state transition even if the VM is already stopped,
2168 current state is forgotten forever */
56983463 2169int vm_stop_force_state(RunState state)
8a9236f1
LC
2170{
2171 if (runstate_is_running()) {
56983463 2172 return vm_stop(state);
8a9236f1
LC
2173 } else {
2174 runstate_set(state);
b2780d32
WC
2175
2176 bdrv_drain_all();
594a45ce
KW
2177 /* Make sure to return an error if the flush in a previous vm_stop()
2178 * failed. */
22af08ea 2179 return bdrv_flush_all();
8a9236f1
LC
2180 }
2181}
2182
0442428a 2183void list_cpus(const char *optarg)
262353cb
BS
2184{
2185 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2186#if defined(cpu_list)
0442428a 2187 cpu_list();
262353cb
BS
2188#endif
2189}
de0b36b6 2190
0cfd6a9a
LC
2191void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2192 bool has_cpu, int64_t cpu_index, Error **errp)
2193{
2194 FILE *f;
2195 uint32_t l;
55e5c285 2196 CPUState *cpu;
0cfd6a9a 2197 uint8_t buf[1024];
0dc9daf0 2198 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2199
2200 if (!has_cpu) {
2201 cpu_index = 0;
2202 }
2203
151d1322
AF
2204 cpu = qemu_get_cpu(cpu_index);
2205 if (cpu == NULL) {
c6bd8c70
MA
2206 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2207 "a CPU number");
0cfd6a9a
LC
2208 return;
2209 }
2210
2211 f = fopen(filename, "wb");
2212 if (!f) {
618da851 2213 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2214 return;
2215 }
2216
2217 while (size != 0) {
2218 l = sizeof(buf);
2219 if (l > size)
2220 l = size;
2f4d0f59 2221 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2222 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2223 " specified", orig_addr, orig_size);
2f4d0f59
AK
2224 goto exit;
2225 }
0cfd6a9a 2226 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2227 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2228 goto exit;
2229 }
2230 addr += l;
2231 size -= l;
2232 }
2233
2234exit:
2235 fclose(f);
2236}
6d3962bf
LC
2237
2238void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2239 Error **errp)
2240{
2241 FILE *f;
2242 uint32_t l;
2243 uint8_t buf[1024];
2244
2245 f = fopen(filename, "wb");
2246 if (!f) {
618da851 2247 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2248 return;
2249 }
2250
2251 while (size != 0) {
2252 l = sizeof(buf);
2253 if (l > size)
2254 l = size;
eb6282f2 2255 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2256 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2257 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2258 goto exit;
2259 }
2260 addr += l;
2261 size -= l;
2262 }
2263
2264exit:
2265 fclose(f);
2266}
ab49ab5c
LC
2267
2268void qmp_inject_nmi(Error **errp)
2269{
9cb805fd 2270 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2271}
27498bef 2272
76c86615 2273void dump_drift_info(void)
27498bef
ST
2274{
2275 if (!use_icount) {
2276 return;
2277 }
2278
76c86615 2279 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2280 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2281 if (icount_align_option) {
76c86615
MA
2282 qemu_printf("Max guest delay %"PRIi64" ms\n",
2283 -max_delay / SCALE_MS);
2284 qemu_printf("Max guest advance %"PRIi64" ms\n",
2285 max_advance / SCALE_MS);
27498bef 2286 } else {
76c86615
MA
2287 qemu_printf("Max guest delay NA\n");
2288 qemu_printf("Max guest advance NA\n");
27498bef
ST
2289 }
2290}