]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190816' into...
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
d6454270 28#include "migration/vmstate.h"
83c9089e 29#include "monitor/monitor.h"
e688df6b 30#include "qapi/error.h"
112ed241 31#include "qapi/qapi-commands-misc.h"
9af23989 32#include "qapi/qapi-events-run-state.h"
a4e15de9 33#include "qapi/qmp/qerror.h"
d49b6836 34#include "qemu/error-report.h"
76c86615 35#include "qemu/qemu-print.h"
14a48c1d 36#include "sysemu/tcg.h"
da31d594 37#include "sysemu/block-backend.h"
022c62cb 38#include "exec/gdbstub.h"
9c17d615 39#include "sysemu/dma.h"
b3946626 40#include "sysemu/hw_accel.h"
9c17d615 41#include "sysemu/kvm.h"
b0cb0a66 42#include "sysemu/hax.h"
c97d6d2c 43#include "sysemu/hvf.h"
19306806 44#include "sysemu/whpx.h"
63c91552 45#include "exec/exec-all.h"
296af7c9 46
1de7afc9 47#include "qemu/thread.h"
9c17d615
PB
48#include "sysemu/cpus.h"
49#include "sysemu/qtest.h"
1de7afc9 50#include "qemu/main-loop.h"
922a01a0 51#include "qemu/option.h"
1de7afc9 52#include "qemu/bitmap.h"
cb365646 53#include "qemu/seqlock.h"
9c09a251 54#include "qemu/guest-random.h"
8d4e9146 55#include "tcg.h"
9cb805fd 56#include "hw/nmi.h"
8b427044 57#include "sysemu/replay.h"
54d31236 58#include "sysemu/runstate.h"
5cc8767d 59#include "hw/boards.h"
650d103d 60#include "hw/hw.h"
0ff0fc19 61
6d9cb73c
JK
62#ifdef CONFIG_LINUX
63
64#include <sys/prctl.h>
65
c0532a76
MT
66#ifndef PR_MCE_KILL
67#define PR_MCE_KILL 33
68#endif
69
6d9cb73c
JK
70#ifndef PR_MCE_KILL_SET
71#define PR_MCE_KILL_SET 1
72#endif
73
74#ifndef PR_MCE_KILL_EARLY
75#define PR_MCE_KILL_EARLY 1
76#endif
77
78#endif /* CONFIG_LINUX */
79
27498bef
ST
80int64_t max_delay;
81int64_t max_advance;
296af7c9 82
2adcc85d
JH
83/* vcpu throttling controls */
84static QEMUTimer *throttle_timer;
85static unsigned int throttle_percentage;
86
87#define CPU_THROTTLE_PCT_MIN 1
88#define CPU_THROTTLE_PCT_MAX 99
89#define CPU_THROTTLE_TIMESLICE_NS 10000000
90
321bc0b2
TC
91bool cpu_is_stopped(CPUState *cpu)
92{
93 return cpu->stopped || !runstate_is_running();
94}
95
a98ae1d8 96static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 97{
c64ca814 98 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
99 return false;
100 }
321bc0b2 101 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
102 return true;
103 }
8c2e1b00 104 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 105 kvm_halt_in_kernel()) {
ac873f1e
PM
106 return false;
107 }
108 return true;
109}
110
111static bool all_cpu_threads_idle(void)
112{
182735ef 113 CPUState *cpu;
ac873f1e 114
bdc44640 115 CPU_FOREACH(cpu) {
182735ef 116 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
117 return false;
118 }
119 }
120 return true;
121}
122
946fb27c
PB
123/***********************************************************/
124/* guest cycle counter */
125
a3270e19
PB
126/* Protected by TimersState seqlock */
127
5045e9d9 128static bool icount_sleep = true;
946fb27c
PB
129/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
130#define MAX_ICOUNT_SHIFT 10
a3270e19 131
946fb27c 132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646 136
94377115
PB
137 /* Protect fields that can be respectively read outside the
138 * BQL, and written from multiple threads.
cb365646
LPF
139 */
140 QemuSeqLock vm_clock_seqlock;
94377115
PB
141 QemuSpin vm_clock_lock;
142
143 int16_t cpu_ticks_enabled;
c96778bb 144
c1ff073c 145 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
146 int16_t icount_time_shift;
147
c96778bb
FK
148 /* Compensate for varying guest execution speed. */
149 int64_t qemu_icount_bias;
94377115
PB
150
151 int64_t vm_clock_warp_start;
152 int64_t cpu_clock_offset;
153
c96778bb
FK
154 /* Only written by TCG thread */
155 int64_t qemu_icount;
94377115 156
b39e3f34 157 /* for adjusting icount */
b39e3f34
PD
158 QEMUTimer *icount_rt_timer;
159 QEMUTimer *icount_vm_timer;
160 QEMUTimer *icount_warp_timer;
946fb27c
PB
161} TimersState;
162
d9cd4007 163static TimersState timers_state;
8d4e9146
FK
164bool mttcg_enabled;
165
166/*
167 * We default to false if we know other options have been enabled
168 * which are currently incompatible with MTTCG. Otherwise when each
169 * guest (target) has been updated to support:
170 * - atomic instructions
171 * - memory ordering primitives (barriers)
172 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
173 *
174 * Once a guest architecture has been converted to the new primitives
175 * there are two remaining limitations to check.
176 *
177 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
178 * - The host must have a stronger memory order than the guest
179 *
180 * It may be possible in future to support strong guests on weak hosts
181 * but that will require tagging all load/stores in a guest with their
182 * implicit memory order requirements which would likely slow things
183 * down a lot.
184 */
185
186static bool check_tcg_memory_orders_compatible(void)
187{
188#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
189 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
190#else
191 return false;
192#endif
193}
194
195static bool default_mttcg_enabled(void)
196{
83fd9629 197 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
198 return false;
199 } else {
200#ifdef TARGET_SUPPORTS_MTTCG
201 return check_tcg_memory_orders_compatible();
202#else
203 return false;
204#endif
205 }
206}
207
208void qemu_tcg_configure(QemuOpts *opts, Error **errp)
209{
210 const char *t = qemu_opt_get(opts, "thread");
211 if (t) {
212 if (strcmp(t, "multi") == 0) {
213 if (TCG_OVERSIZED_GUEST) {
214 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
215 } else if (use_icount) {
216 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 217 } else {
86953503 218#ifndef TARGET_SUPPORTS_MTTCG
0765691e
MA
219 warn_report("Guest not yet converted to MTTCG - "
220 "you may get unexpected results");
c34c7620 221#endif
8d4e9146 222 if (!check_tcg_memory_orders_compatible()) {
0765691e
MA
223 warn_report("Guest expects a stronger memory ordering "
224 "than the host provides");
8cfef892 225 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
226 }
227 mttcg_enabled = true;
228 }
229 } else if (strcmp(t, "single") == 0) {
230 mttcg_enabled = false;
231 } else {
232 error_setg(errp, "Invalid 'thread' setting %s", t);
233 }
234 } else {
235 mttcg_enabled = default_mttcg_enabled();
236 }
237}
946fb27c 238
e4cd9657
AB
239/* The current number of executed instructions is based on what we
240 * originally budgeted minus the current state of the decrementing
241 * icount counters in extra/u16.low.
242 */
243static int64_t cpu_get_icount_executed(CPUState *cpu)
244{
5e140196
RH
245 return (cpu->icount_budget -
246 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
247}
248
512d3c80
AB
249/*
250 * Update the global shared timer_state.qemu_icount to take into
251 * account executed instructions. This is done by the TCG vCPU
252 * thread so the main-loop can see time has moved forward.
253 */
9b4e6f49 254static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
255{
256 int64_t executed = cpu_get_icount_executed(cpu);
257 cpu->icount_budget -= executed;
258
38adcb6e
EC
259 atomic_set_i64(&timers_state.qemu_icount,
260 timers_state.qemu_icount + executed);
9b4e6f49
PB
261}
262
263/*
264 * Update the global shared timer_state.qemu_icount to take into
265 * account executed instructions. This is done by the TCG vCPU
266 * thread so the main-loop can see time has moved forward.
267 */
268void cpu_update_icount(CPUState *cpu)
269{
270 seqlock_write_lock(&timers_state.vm_clock_seqlock,
271 &timers_state.vm_clock_lock);
272 cpu_update_icount_locked(cpu);
94377115
PB
273 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
274 &timers_state.vm_clock_lock);
512d3c80
AB
275}
276
c1ff073c 277static int64_t cpu_get_icount_raw_locked(void)
946fb27c 278{
4917cf44 279 CPUState *cpu = current_cpu;
946fb27c 280
243c5f77 281 if (cpu && cpu->running) {
414b15c9 282 if (!cpu->can_do_io) {
493d89bf 283 error_report("Bad icount read");
2a62914b 284 exit(1);
946fb27c 285 }
e4cd9657 286 /* Take into account what has run */
9b4e6f49 287 cpu_update_icount_locked(cpu);
946fb27c 288 }
38adcb6e
EC
289 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
290 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
291}
292
2a62914b
PD
293static int64_t cpu_get_icount_locked(void)
294{
c1ff073c 295 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
296 return atomic_read_i64(&timers_state.qemu_icount_bias) +
297 cpu_icount_to_ns(icount);
c1ff073c
PB
298}
299
300int64_t cpu_get_icount_raw(void)
301{
302 int64_t icount;
303 unsigned start;
304
305 do {
306 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
307 icount = cpu_get_icount_raw_locked();
308 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
309
310 return icount;
946fb27c
PB
311}
312
c1ff073c 313/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
314int64_t cpu_get_icount(void)
315{
316 int64_t icount;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 icount = cpu_get_icount_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return icount;
325}
326
3f031313
FK
327int64_t cpu_icount_to_ns(int64_t icount)
328{
c1ff073c 329 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
330}
331
f2a4ad6d
PB
332static int64_t cpu_get_ticks_locked(void)
333{
334 int64_t ticks = timers_state.cpu_ticks_offset;
335 if (timers_state.cpu_ticks_enabled) {
336 ticks += cpu_get_host_ticks();
337 }
338
339 if (timers_state.cpu_ticks_prev > ticks) {
340 /* Non increasing ticks may happen if the host uses software suspend. */
341 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
342 ticks = timers_state.cpu_ticks_prev;
343 }
344
345 timers_state.cpu_ticks_prev = ticks;
346 return ticks;
347}
348
d90f3cca
C
349/* return the time elapsed in VM between vm_start and vm_stop. Unless
350 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
351 * counter.
d90f3cca 352 */
946fb27c
PB
353int64_t cpu_get_ticks(void)
354{
5f3e3101
PB
355 int64_t ticks;
356
946fb27c
PB
357 if (use_icount) {
358 return cpu_get_icount();
359 }
5f3e3101 360
f2a4ad6d
PB
361 qemu_spin_lock(&timers_state.vm_clock_lock);
362 ticks = cpu_get_ticks_locked();
363 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 364 return ticks;
946fb27c
PB
365}
366
cb365646 367static int64_t cpu_get_clock_locked(void)
946fb27c 368{
1d45cea5 369 int64_t time;
cb365646 370
1d45cea5 371 time = timers_state.cpu_clock_offset;
5f3e3101 372 if (timers_state.cpu_ticks_enabled) {
1d45cea5 373 time += get_clock();
946fb27c 374 }
cb365646 375
1d45cea5 376 return time;
cb365646
LPF
377}
378
d90f3cca 379/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
380 * the time between vm_start and vm_stop
381 */
cb365646
LPF
382int64_t cpu_get_clock(void)
383{
384 int64_t ti;
385 unsigned start;
386
387 do {
388 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
389 ti = cpu_get_clock_locked();
390 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
391
392 return ti;
946fb27c
PB
393}
394
cb365646 395/* enable cpu_get_ticks()
3224e878 396 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 397 */
946fb27c
PB
398void cpu_enable_ticks(void)
399{
94377115
PB
400 seqlock_write_lock(&timers_state.vm_clock_seqlock,
401 &timers_state.vm_clock_lock);
946fb27c 402 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 403 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
404 timers_state.cpu_clock_offset -= get_clock();
405 timers_state.cpu_ticks_enabled = 1;
406 }
94377115
PB
407 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
408 &timers_state.vm_clock_lock);
946fb27c
PB
409}
410
411/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 412 * cpu_get_ticks() after that.
3224e878 413 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 414 */
946fb27c
PB
415void cpu_disable_ticks(void)
416{
94377115
PB
417 seqlock_write_lock(&timers_state.vm_clock_seqlock,
418 &timers_state.vm_clock_lock);
946fb27c 419 if (timers_state.cpu_ticks_enabled) {
4a7428c5 420 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 421 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
422 timers_state.cpu_ticks_enabled = 0;
423 }
94377115
PB
424 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
425 &timers_state.vm_clock_lock);
946fb27c
PB
426}
427
428/* Correlation between real and virtual time is always going to be
429 fairly approximate, so ignore small variation.
430 When the guest is idle real and virtual time will be aligned in
431 the IO wait loop. */
73bcb24d 432#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
433
434static void icount_adjust(void)
435{
436 int64_t cur_time;
437 int64_t cur_icount;
438 int64_t delta;
a3270e19
PB
439
440 /* Protected by TimersState mutex. */
946fb27c 441 static int64_t last_delta;
468cc7cf 442
946fb27c
PB
443 /* If the VM is not running, then do nothing. */
444 if (!runstate_is_running()) {
445 return;
446 }
468cc7cf 447
94377115
PB
448 seqlock_write_lock(&timers_state.vm_clock_seqlock,
449 &timers_state.vm_clock_lock);
17a15f1b
PB
450 cur_time = cpu_get_clock_locked();
451 cur_icount = cpu_get_icount_locked();
468cc7cf 452
946fb27c
PB
453 delta = cur_icount - cur_time;
454 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
455 if (delta > 0
456 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 457 && timers_state.icount_time_shift > 0) {
946fb27c 458 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
459 atomic_set(&timers_state.icount_time_shift,
460 timers_state.icount_time_shift - 1);
946fb27c
PB
461 }
462 if (delta < 0
463 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 464 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 465 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
466 atomic_set(&timers_state.icount_time_shift,
467 timers_state.icount_time_shift + 1);
946fb27c
PB
468 }
469 last_delta = delta;
c97595d1
EC
470 atomic_set_i64(&timers_state.qemu_icount_bias,
471 cur_icount - (timers_state.qemu_icount
472 << timers_state.icount_time_shift));
94377115
PB
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
474 &timers_state.vm_clock_lock);
946fb27c
PB
475}
476
477static void icount_adjust_rt(void *opaque)
478{
b39e3f34 479 timer_mod(timers_state.icount_rt_timer,
1979b908 480 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
481 icount_adjust();
482}
483
484static void icount_adjust_vm(void *opaque)
485{
b39e3f34 486 timer_mod(timers_state.icount_vm_timer,
40daca54 487 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 488 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
489 icount_adjust();
490}
491
492static int64_t qemu_icount_round(int64_t count)
493{
c1ff073c
PB
494 int shift = atomic_read(&timers_state.icount_time_shift);
495 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
496}
497
efab87cf 498static void icount_warp_rt(void)
946fb27c 499{
ccffff48
AB
500 unsigned seq;
501 int64_t warp_start;
502
17a15f1b
PB
503 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
504 * changes from -1 to another value, so the race here is okay.
505 */
ccffff48
AB
506 do {
507 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 508 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
509 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
510
511 if (warp_start == -1) {
946fb27c
PB
512 return;
513 }
514
94377115
PB
515 seqlock_write_lock(&timers_state.vm_clock_seqlock,
516 &timers_state.vm_clock_lock);
946fb27c 517 if (runstate_is_running()) {
74c0b816
PB
518 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
519 cpu_get_clock_locked());
8ed961d9
PB
520 int64_t warp_delta;
521
b39e3f34 522 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 523 if (use_icount == 2) {
946fb27c 524 /*
40daca54 525 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
526 * far ahead of real time.
527 */
17a15f1b 528 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 529 int64_t delta = clock - cur_icount;
8ed961d9 530 warp_delta = MIN(warp_delta, delta);
946fb27c 531 }
c97595d1
EC
532 atomic_set_i64(&timers_state.qemu_icount_bias,
533 timers_state.qemu_icount_bias + warp_delta);
946fb27c 534 }
b39e3f34 535 timers_state.vm_clock_warp_start = -1;
94377115
PB
536 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
537 &timers_state.vm_clock_lock);
8ed961d9
PB
538
539 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
540 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
541 }
946fb27c
PB
542}
543
e76d1798 544static void icount_timer_cb(void *opaque)
efab87cf 545{
e76d1798
PD
546 /* No need for a checkpoint because the timer already synchronizes
547 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
548 */
549 icount_warp_rt();
efab87cf
PD
550}
551
8156be56
PB
552void qtest_clock_warp(int64_t dest)
553{
40daca54 554 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 555 AioContext *aio_context;
8156be56 556 assert(qtest_enabled());
efef88b3 557 aio_context = qemu_get_aio_context();
8156be56 558 while (clock < dest) {
40daca54 559 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 560 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 561
94377115
PB
562 seqlock_write_lock(&timers_state.vm_clock_seqlock,
563 &timers_state.vm_clock_lock);
c97595d1
EC
564 atomic_set_i64(&timers_state.qemu_icount_bias,
565 timers_state.qemu_icount_bias + warp);
94377115
PB
566 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
567 &timers_state.vm_clock_lock);
17a15f1b 568
40daca54 569 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 570 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 571 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 572 }
40daca54 573 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
574}
575
e76d1798 576void qemu_start_warp_timer(void)
946fb27c 577{
ce78d18c 578 int64_t clock;
946fb27c
PB
579 int64_t deadline;
580
e76d1798 581 if (!use_icount) {
946fb27c
PB
582 return;
583 }
584
8bd7f71d
PD
585 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
586 * do not fire, so computing the deadline does not make sense.
587 */
588 if (!runstate_is_running()) {
589 return;
590 }
591
0c08185f
PD
592 if (replay_mode != REPLAY_MODE_PLAY) {
593 if (!all_cpu_threads_idle()) {
594 return;
595 }
8bd7f71d 596
0c08185f
PD
597 if (qtest_enabled()) {
598 /* When testing, qtest commands advance icount. */
599 return;
600 }
946fb27c 601
0c08185f
PD
602 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
603 } else {
604 /* warp clock deterministically in record/replay mode */
605 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
606 /* vCPU is sleeping and warp can't be started.
607 It is probably a race condition: notification sent
608 to vCPU was processed in advance and vCPU went to sleep.
609 Therefore we have to wake it up for doing someting. */
610 if (replay_has_checkpoint()) {
611 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
612 }
613 return;
614 }
8156be56
PB
615 }
616
ac70aafc 617 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 618 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 619 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 620 if (deadline < 0) {
d7a0f71d
VC
621 static bool notified;
622 if (!icount_sleep && !notified) {
3dc6f869 623 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
624 notified = true;
625 }
ce78d18c 626 return;
ac70aafc
AB
627 }
628
946fb27c
PB
629 if (deadline > 0) {
630 /*
40daca54 631 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
632 * sleep. Otherwise, the CPU might be waiting for a future timer
633 * interrupt to wake it up, but the interrupt never comes because
634 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 635 * QEMU_CLOCK_VIRTUAL.
946fb27c 636 */
5045e9d9
VC
637 if (!icount_sleep) {
638 /*
639 * We never let VCPUs sleep in no sleep icount mode.
640 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
641 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
642 * It is useful when we want a deterministic execution time,
643 * isolated from host latencies.
644 */
94377115
PB
645 seqlock_write_lock(&timers_state.vm_clock_seqlock,
646 &timers_state.vm_clock_lock);
c97595d1
EC
647 atomic_set_i64(&timers_state.qemu_icount_bias,
648 timers_state.qemu_icount_bias + deadline);
94377115
PB
649 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
650 &timers_state.vm_clock_lock);
5045e9d9
VC
651 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
652 } else {
653 /*
654 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
655 * "real" time, (related to the time left until the next event) has
656 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
657 * This avoids that the warps are visible externally; for example,
658 * you will not be sending network packets continuously instead of
659 * every 100ms.
660 */
94377115
PB
661 seqlock_write_lock(&timers_state.vm_clock_seqlock,
662 &timers_state.vm_clock_lock);
b39e3f34
PD
663 if (timers_state.vm_clock_warp_start == -1
664 || timers_state.vm_clock_warp_start > clock) {
665 timers_state.vm_clock_warp_start = clock;
5045e9d9 666 }
94377115
PB
667 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
668 &timers_state.vm_clock_lock);
b39e3f34
PD
669 timer_mod_anticipate(timers_state.icount_warp_timer,
670 clock + deadline);
ce78d18c 671 }
ac70aafc 672 } else if (deadline == 0) {
40daca54 673 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
674 }
675}
676
e76d1798
PD
677static void qemu_account_warp_timer(void)
678{
679 if (!use_icount || !icount_sleep) {
680 return;
681 }
682
683 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
684 * do not fire, so computing the deadline does not make sense.
685 */
686 if (!runstate_is_running()) {
687 return;
688 }
689
690 /* warp clock deterministically in record/replay mode */
691 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
692 return;
693 }
694
b39e3f34 695 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
696 icount_warp_rt();
697}
698
d09eae37
FK
699static bool icount_state_needed(void *opaque)
700{
701 return use_icount;
702}
703
b39e3f34
PD
704static bool warp_timer_state_needed(void *opaque)
705{
706 TimersState *s = opaque;
707 return s->icount_warp_timer != NULL;
708}
709
710static bool adjust_timers_state_needed(void *opaque)
711{
712 TimersState *s = opaque;
713 return s->icount_rt_timer != NULL;
714}
715
716/*
717 * Subsection for warp timer migration is optional, because may not be created
718 */
719static const VMStateDescription icount_vmstate_warp_timer = {
720 .name = "timer/icount/warp_timer",
721 .version_id = 1,
722 .minimum_version_id = 1,
723 .needed = warp_timer_state_needed,
724 .fields = (VMStateField[]) {
725 VMSTATE_INT64(vm_clock_warp_start, TimersState),
726 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
727 VMSTATE_END_OF_LIST()
728 }
729};
730
731static const VMStateDescription icount_vmstate_adjust_timers = {
732 .name = "timer/icount/timers",
733 .version_id = 1,
734 .minimum_version_id = 1,
735 .needed = adjust_timers_state_needed,
736 .fields = (VMStateField[]) {
737 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
738 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
739 VMSTATE_END_OF_LIST()
740 }
741};
742
d09eae37
FK
743/*
744 * This is a subsection for icount migration.
745 */
746static const VMStateDescription icount_vmstate_timers = {
747 .name = "timer/icount",
748 .version_id = 1,
749 .minimum_version_id = 1,
5cd8cada 750 .needed = icount_state_needed,
d09eae37
FK
751 .fields = (VMStateField[]) {
752 VMSTATE_INT64(qemu_icount_bias, TimersState),
753 VMSTATE_INT64(qemu_icount, TimersState),
754 VMSTATE_END_OF_LIST()
b39e3f34
PD
755 },
756 .subsections = (const VMStateDescription*[]) {
757 &icount_vmstate_warp_timer,
758 &icount_vmstate_adjust_timers,
759 NULL
d09eae37
FK
760 }
761};
762
946fb27c
PB
763static const VMStateDescription vmstate_timers = {
764 .name = "timer",
765 .version_id = 2,
766 .minimum_version_id = 1,
35d08458 767 .fields = (VMStateField[]) {
946fb27c 768 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 769 VMSTATE_UNUSED(8),
946fb27c
PB
770 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
771 VMSTATE_END_OF_LIST()
d09eae37 772 },
5cd8cada
JQ
773 .subsections = (const VMStateDescription*[]) {
774 &icount_vmstate_timers,
775 NULL
946fb27c
PB
776 }
777};
778
14e6fe12 779static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 780{
2adcc85d
JH
781 double pct;
782 double throttle_ratio;
783 long sleeptime_ns;
784
785 if (!cpu_throttle_get_percentage()) {
786 return;
787 }
788
789 pct = (double)cpu_throttle_get_percentage()/100;
790 throttle_ratio = pct / (1 - pct);
791 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
792
793 qemu_mutex_unlock_iothread();
2adcc85d
JH
794 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
795 qemu_mutex_lock_iothread();
90bb0c04 796 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
797}
798
799static void cpu_throttle_timer_tick(void *opaque)
800{
801 CPUState *cpu;
802 double pct;
803
804 /* Stop the timer if needed */
805 if (!cpu_throttle_get_percentage()) {
806 return;
807 }
808 CPU_FOREACH(cpu) {
809 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
810 async_run_on_cpu(cpu, cpu_throttle_thread,
811 RUN_ON_CPU_NULL);
2adcc85d
JH
812 }
813 }
814
815 pct = (double)cpu_throttle_get_percentage()/100;
816 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
817 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
818}
819
820void cpu_throttle_set(int new_throttle_pct)
821{
822 /* Ensure throttle percentage is within valid range */
823 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
824 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
825
826 atomic_set(&throttle_percentage, new_throttle_pct);
827
828 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
829 CPU_THROTTLE_TIMESLICE_NS);
830}
831
832void cpu_throttle_stop(void)
833{
834 atomic_set(&throttle_percentage, 0);
835}
836
837bool cpu_throttle_active(void)
838{
839 return (cpu_throttle_get_percentage() != 0);
840}
841
842int cpu_throttle_get_percentage(void)
843{
844 return atomic_read(&throttle_percentage);
845}
846
4603ea01
PD
847void cpu_ticks_init(void)
848{
ccdb3c1f 849 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 850 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 851 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
852 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
853 cpu_throttle_timer_tick, NULL);
4603ea01
PD
854}
855
1ad9580b 856void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 857{
1ad9580b 858 const char *option;
a8bfac37 859 char *rem_str = NULL;
1ad9580b 860
1ad9580b 861 option = qemu_opt_get(opts, "shift");
946fb27c 862 if (!option) {
a8bfac37
ST
863 if (qemu_opt_get(opts, "align") != NULL) {
864 error_setg(errp, "Please specify shift option when using align");
865 }
946fb27c
PB
866 return;
867 }
f1f4b57e
VC
868
869 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9 870 if (icount_sleep) {
b39e3f34 871 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 872 icount_timer_cb, NULL);
5045e9d9 873 }
f1f4b57e 874
a8bfac37 875 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
876
877 if (icount_align_option && !icount_sleep) {
778d9f9b 878 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 879 }
946fb27c 880 if (strcmp(option, "auto") != 0) {
a8bfac37 881 errno = 0;
c1ff073c 882 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
a8bfac37
ST
883 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
884 error_setg(errp, "icount: Invalid shift value");
885 }
946fb27c
PB
886 use_icount = 1;
887 return;
a8bfac37
ST
888 } else if (icount_align_option) {
889 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 890 } else if (!icount_sleep) {
778d9f9b 891 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
892 }
893
894 use_icount = 2;
895
896 /* 125MIPS seems a reasonable initial guess at the guest speed.
897 It will be corrected fairly quickly anyway. */
c1ff073c 898 timers_state.icount_time_shift = 3;
946fb27c
PB
899
900 /* Have both realtime and virtual time triggers for speed adjustment.
901 The realtime trigger catches emulated time passing too slowly,
902 the virtual time trigger catches emulated time passing too fast.
903 Realtime triggers occur even when idle, so use them less frequently
904 than VM triggers. */
b39e3f34
PD
905 timers_state.vm_clock_warp_start = -1;
906 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 907 icount_adjust_rt, NULL);
b39e3f34 908 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 909 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 910 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 911 icount_adjust_vm, NULL);
b39e3f34 912 timer_mod(timers_state.icount_vm_timer,
40daca54 913 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 914 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
915}
916
6546706d
AB
917/***********************************************************/
918/* TCG vCPU kick timer
919 *
920 * The kick timer is responsible for moving single threaded vCPU
921 * emulation on to the next vCPU. If more than one vCPU is running a
922 * timer event with force a cpu->exit so the next vCPU can get
923 * scheduled.
924 *
925 * The timer is removed if all vCPUs are idle and restarted again once
926 * idleness is complete.
927 */
928
929static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 930static CPUState *tcg_current_rr_cpu;
6546706d
AB
931
932#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
933
934static inline int64_t qemu_tcg_next_kick(void)
935{
936 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
937}
938
791158d9
AB
939/* Kick the currently round-robin scheduled vCPU */
940static void qemu_cpu_kick_rr_cpu(void)
941{
942 CPUState *cpu;
791158d9
AB
943 do {
944 cpu = atomic_mb_read(&tcg_current_rr_cpu);
945 if (cpu) {
946 cpu_exit(cpu);
947 }
948 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
949}
950
6b8f0187
PB
951static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
952{
953}
954
3f53bc61
PB
955void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
956{
6b8f0187
PB
957 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
958 qemu_notify_event();
959 return;
960 }
961
c52e7132
PM
962 if (qemu_in_vcpu_thread()) {
963 /* A CPU is currently running; kick it back out to the
964 * tcg_cpu_exec() loop so it will recalculate its
965 * icount deadline immediately.
966 */
967 qemu_cpu_kick(current_cpu);
968 } else if (first_cpu) {
6b8f0187
PB
969 /* qemu_cpu_kick is not enough to kick a halted CPU out of
970 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
971 * causes cpu_thread_is_idle to return false. This way,
972 * handle_icount_deadline can run.
c52e7132
PM
973 * If we have no CPUs at all for some reason, we don't
974 * need to do anything.
6b8f0187
PB
975 */
976 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
977 }
3f53bc61
PB
978}
979
6546706d
AB
980static void kick_tcg_thread(void *opaque)
981{
982 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 983 qemu_cpu_kick_rr_cpu();
6546706d
AB
984}
985
986static void start_tcg_kick_timer(void)
987{
db08b687
PB
988 assert(!mttcg_enabled);
989 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
990 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
991 kick_tcg_thread, NULL);
1926ab27
AB
992 }
993 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
994 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
995 }
996}
997
998static void stop_tcg_kick_timer(void)
999{
db08b687 1000 assert(!mttcg_enabled);
1926ab27 1001 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 1002 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
1003 }
1004}
1005
296af7c9
BS
1006/***********************************************************/
1007void hw_error(const char *fmt, ...)
1008{
1009 va_list ap;
55e5c285 1010 CPUState *cpu;
296af7c9
BS
1011
1012 va_start(ap, fmt);
1013 fprintf(stderr, "qemu: hardware error: ");
1014 vfprintf(stderr, fmt, ap);
1015 fprintf(stderr, "\n");
bdc44640 1016 CPU_FOREACH(cpu) {
55e5c285 1017 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 1018 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
1019 }
1020 va_end(ap);
1021 abort();
1022}
1023
1024void cpu_synchronize_all_states(void)
1025{
182735ef 1026 CPUState *cpu;
296af7c9 1027
bdc44640 1028 CPU_FOREACH(cpu) {
182735ef 1029 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
1030 /* TODO: move to cpu_synchronize_state() */
1031 if (hvf_enabled()) {
1032 hvf_cpu_synchronize_state(cpu);
1033 }
296af7c9
BS
1034 }
1035}
1036
1037void cpu_synchronize_all_post_reset(void)
1038{
182735ef 1039 CPUState *cpu;
296af7c9 1040
bdc44640 1041 CPU_FOREACH(cpu) {
182735ef 1042 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1043 /* TODO: move to cpu_synchronize_post_reset() */
1044 if (hvf_enabled()) {
1045 hvf_cpu_synchronize_post_reset(cpu);
1046 }
296af7c9
BS
1047 }
1048}
1049
1050void cpu_synchronize_all_post_init(void)
1051{
182735ef 1052 CPUState *cpu;
296af7c9 1053
bdc44640 1054 CPU_FOREACH(cpu) {
182735ef 1055 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1056 /* TODO: move to cpu_synchronize_post_init() */
1057 if (hvf_enabled()) {
1058 hvf_cpu_synchronize_post_init(cpu);
1059 }
296af7c9
BS
1060 }
1061}
1062
75e972da
DG
1063void cpu_synchronize_all_pre_loadvm(void)
1064{
1065 CPUState *cpu;
1066
1067 CPU_FOREACH(cpu) {
1068 cpu_synchronize_pre_loadvm(cpu);
1069 }
1070}
1071
4486e89c 1072static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1073{
56983463
KW
1074 int ret = 0;
1075
1354869c 1076 if (runstate_is_running()) {
296af7c9 1077 cpu_disable_ticks();
296af7c9 1078 pause_all_vcpus();
f5bbfba1 1079 runstate_set(state);
1dfb4dd9 1080 vm_state_notify(0, state);
4486e89c 1081 if (send_stop) {
3ab72385 1082 qapi_event_send_stop();
4486e89c 1083 }
296af7c9 1084 }
56983463 1085
594a45ce 1086 bdrv_drain_all();
6d0ceb80 1087 replay_disable_events();
22af08ea 1088 ret = bdrv_flush_all();
594a45ce 1089
56983463 1090 return ret;
296af7c9
BS
1091}
1092
4486e89c
SH
1093/* Special vm_stop() variant for terminating the process. Historically clients
1094 * did not expect a QMP STOP event and so we need to retain compatibility.
1095 */
1096int vm_shutdown(void)
1097{
1098 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1099}
1100
a1fcaa73 1101static bool cpu_can_run(CPUState *cpu)
296af7c9 1102{
4fdeee7c 1103 if (cpu->stop) {
a1fcaa73 1104 return false;
0ab07c62 1105 }
321bc0b2 1106 if (cpu_is_stopped(cpu)) {
a1fcaa73 1107 return false;
0ab07c62 1108 }
a1fcaa73 1109 return true;
296af7c9
BS
1110}
1111
91325046 1112static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1113{
64f6b346 1114 gdb_set_stop_cpu(cpu);
8cf71710 1115 qemu_system_debug_request();
f324e766 1116 cpu->stopped = true;
3c638d06
JK
1117}
1118
6d9cb73c
JK
1119#ifdef CONFIG_LINUX
1120static void sigbus_reraise(void)
1121{
1122 sigset_t set;
1123 struct sigaction action;
1124
1125 memset(&action, 0, sizeof(action));
1126 action.sa_handler = SIG_DFL;
1127 if (!sigaction(SIGBUS, &action, NULL)) {
1128 raise(SIGBUS);
1129 sigemptyset(&set);
1130 sigaddset(&set, SIGBUS);
a2d1761d 1131 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1132 }
1133 perror("Failed to re-raise SIGBUS!\n");
1134 abort();
1135}
1136
d98d4072 1137static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1138{
a16fc07e
PB
1139 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1140 sigbus_reraise();
1141 }
1142
2ae41db2
PB
1143 if (current_cpu) {
1144 /* Called asynchronously in VCPU thread. */
1145 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1146 sigbus_reraise();
1147 }
1148 } else {
1149 /* Called synchronously (via signalfd) in main thread. */
1150 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1151 sigbus_reraise();
1152 }
6d9cb73c
JK
1153 }
1154}
1155
1156static void qemu_init_sigbus(void)
1157{
1158 struct sigaction action;
1159
1160 memset(&action, 0, sizeof(action));
1161 action.sa_flags = SA_SIGINFO;
d98d4072 1162 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1163 sigaction(SIGBUS, &action, NULL);
1164
1165 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1166}
6d9cb73c 1167#else /* !CONFIG_LINUX */
6d9cb73c
JK
1168static void qemu_init_sigbus(void)
1169{
1170}
a16fc07e 1171#endif /* !CONFIG_LINUX */
ff48eb5f 1172
b2532d88 1173static QemuMutex qemu_global_mutex;
296af7c9
BS
1174
1175static QemuThread io_thread;
1176
296af7c9
BS
1177/* cpu creation */
1178static QemuCond qemu_cpu_cond;
1179/* system init */
296af7c9
BS
1180static QemuCond qemu_pause_cond;
1181
d3b12f5d 1182void qemu_init_cpu_loop(void)
296af7c9 1183{
6d9cb73c 1184 qemu_init_sigbus();
ed94592b 1185 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1186 qemu_cond_init(&qemu_pause_cond);
296af7c9 1187 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1188
b7680cb6 1189 qemu_thread_get_self(&io_thread);
296af7c9
BS
1190}
1191
14e6fe12 1192void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1193{
d148d90e 1194 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1195}
1196
4c055ab5
GZ
1197static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1198{
1199 if (kvm_destroy_vcpu(cpu) < 0) {
1200 error_report("kvm_destroy_vcpu failed");
1201 exit(EXIT_FAILURE);
1202 }
1203}
1204
1205static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1206{
1207}
1208
ebd05fea
DH
1209static void qemu_cpu_stop(CPUState *cpu, bool exit)
1210{
1211 g_assert(qemu_cpu_is_self(cpu));
1212 cpu->stop = false;
1213 cpu->stopped = true;
1214 if (exit) {
1215 cpu_exit(cpu);
1216 }
1217 qemu_cond_broadcast(&qemu_pause_cond);
1218}
1219
509a0d78 1220static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1221{
37257942 1222 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1223 if (cpu->stop) {
ebd05fea 1224 qemu_cpu_stop(cpu, false);
296af7c9 1225 }
a5403c69 1226 process_queued_cpu_work(cpu);
37257942
AB
1227}
1228
a8efa606 1229static void qemu_tcg_rr_wait_io_event(void)
37257942 1230{
a8efa606
PB
1231 CPUState *cpu;
1232
db08b687 1233 while (all_cpu_threads_idle()) {
6546706d 1234 stop_tcg_kick_timer();
a8efa606 1235 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1236 }
296af7c9 1237
6546706d
AB
1238 start_tcg_kick_timer();
1239
a8efa606
PB
1240 CPU_FOREACH(cpu) {
1241 qemu_wait_io_event_common(cpu);
1242 }
296af7c9
BS
1243}
1244
db08b687 1245static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1246{
a98ae1d8 1247 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1248 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1249 }
296af7c9 1250
db08b687
PB
1251#ifdef _WIN32
1252 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1253 if (!tcg_enabled()) {
1254 SleepEx(0, TRUE);
c97d6d2c 1255 }
db08b687 1256#endif
c97d6d2c
SAGDR
1257 qemu_wait_io_event_common(cpu);
1258}
1259
7e97cd88 1260static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1261{
48a106bd 1262 CPUState *cpu = arg;
84b4915d 1263 int r;
296af7c9 1264
ab28bd23
PB
1265 rcu_register_thread();
1266
2e7f7a3c 1267 qemu_mutex_lock_iothread();
814e612e 1268 qemu_thread_get_self(cpu->thread);
9f09e18a 1269 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1270 cpu->can_do_io = 1;
4917cf44 1271 current_cpu = cpu;
296af7c9 1272
504134d2 1273 r = kvm_init_vcpu(cpu);
84b4915d 1274 if (r < 0) {
493d89bf 1275 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1276 exit(1);
1277 }
296af7c9 1278
18268b60 1279 kvm_init_cpu_signals(cpu);
296af7c9
BS
1280
1281 /* signal CPU creation */
61a46217 1282 cpu->created = true;
296af7c9 1283 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1284 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1285
4c055ab5 1286 do {
a1fcaa73 1287 if (cpu_can_run(cpu)) {
1458c363 1288 r = kvm_cpu_exec(cpu);
83f338f7 1289 if (r == EXCP_DEBUG) {
91325046 1290 cpu_handle_guest_debug(cpu);
83f338f7 1291 }
0ab07c62 1292 }
db08b687 1293 qemu_wait_io_event(cpu);
4c055ab5 1294 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1295
4c055ab5 1296 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1297 cpu->created = false;
1298 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1299 qemu_mutex_unlock_iothread();
57615ed5 1300 rcu_unregister_thread();
296af7c9
BS
1301 return NULL;
1302}
1303
c7f0f3b1
AL
1304static void *qemu_dummy_cpu_thread_fn(void *arg)
1305{
1306#ifdef _WIN32
493d89bf 1307 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1308 exit(1);
1309#else
10a9021d 1310 CPUState *cpu = arg;
c7f0f3b1
AL
1311 sigset_t waitset;
1312 int r;
1313
ab28bd23
PB
1314 rcu_register_thread();
1315
c7f0f3b1 1316 qemu_mutex_lock_iothread();
814e612e 1317 qemu_thread_get_self(cpu->thread);
9f09e18a 1318 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1319 cpu->can_do_io = 1;
37257942 1320 current_cpu = cpu;
c7f0f3b1
AL
1321
1322 sigemptyset(&waitset);
1323 sigaddset(&waitset, SIG_IPI);
1324
1325 /* signal CPU creation */
61a46217 1326 cpu->created = true;
c7f0f3b1 1327 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1328 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1329
d2831ab0 1330 do {
c7f0f3b1
AL
1331 qemu_mutex_unlock_iothread();
1332 do {
1333 int sig;
1334 r = sigwait(&waitset, &sig);
1335 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1336 if (r == -1) {
1337 perror("sigwait");
1338 exit(1);
1339 }
1340 qemu_mutex_lock_iothread();
db08b687 1341 qemu_wait_io_event(cpu);
d2831ab0 1342 } while (!cpu->unplug);
c7f0f3b1 1343
d40bfcbb 1344 qemu_mutex_unlock_iothread();
d2831ab0 1345 rcu_unregister_thread();
c7f0f3b1
AL
1346 return NULL;
1347#endif
1348}
1349
1be7fcb8
AB
1350static int64_t tcg_get_icount_limit(void)
1351{
1352 int64_t deadline;
1353
1354 if (replay_mode != REPLAY_MODE_PLAY) {
1355 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1356
1357 /* Maintain prior (possibly buggy) behaviour where if no deadline
1358 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1359 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1360 * nanoseconds.
1361 */
1362 if ((deadline < 0) || (deadline > INT32_MAX)) {
1363 deadline = INT32_MAX;
1364 }
1365
1366 return qemu_icount_round(deadline);
1367 } else {
1368 return replay_get_instructions();
1369 }
1370}
1371
12e9700d
AB
1372static void handle_icount_deadline(void)
1373{
6b8f0187 1374 assert(qemu_in_vcpu_thread());
12e9700d
AB
1375 if (use_icount) {
1376 int64_t deadline =
1377 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1378
1379 if (deadline == 0) {
6b8f0187 1380 /* Wake up other AioContexts. */
12e9700d 1381 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1382 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1383 }
1384 }
1385}
1386
05248382 1387static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1388{
1be7fcb8 1389 if (use_icount) {
eda5f7c6 1390 int insns_left;
05248382
AB
1391
1392 /* These should always be cleared by process_icount_data after
1393 * each vCPU execution. However u16.high can be raised
1394 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1395 */
5e140196 1396 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1397 g_assert(cpu->icount_extra == 0);
1398
eda5f7c6
AB
1399 cpu->icount_budget = tcg_get_icount_limit();
1400 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1401 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1402 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1403
1404 replay_mutex_lock();
1be7fcb8 1405 }
05248382
AB
1406}
1407
1408static void process_icount_data(CPUState *cpu)
1409{
1be7fcb8 1410 if (use_icount) {
e4cd9657 1411 /* Account for executed instructions */
512d3c80 1412 cpu_update_icount(cpu);
05248382
AB
1413
1414 /* Reset the counters */
5e140196 1415 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1416 cpu->icount_extra = 0;
e4cd9657
AB
1417 cpu->icount_budget = 0;
1418
1be7fcb8 1419 replay_account_executed_instructions();
d759c951
AB
1420
1421 replay_mutex_unlock();
1be7fcb8 1422 }
05248382
AB
1423}
1424
1425
1426static int tcg_cpu_exec(CPUState *cpu)
1427{
1428 int ret;
1429#ifdef CONFIG_PROFILER
1430 int64_t ti;
1431#endif
1432
f28d0dfd 1433 assert(tcg_enabled());
05248382
AB
1434#ifdef CONFIG_PROFILER
1435 ti = profile_getclock();
1436#endif
05248382
AB
1437 cpu_exec_start(cpu);
1438 ret = cpu_exec(cpu);
1439 cpu_exec_end(cpu);
05248382 1440#ifdef CONFIG_PROFILER
72fd2efb
EC
1441 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1442 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1443#endif
1be7fcb8
AB
1444 return ret;
1445}
1446
c93bbbef
AB
1447/* Destroy any remaining vCPUs which have been unplugged and have
1448 * finished running
1449 */
1450static void deal_with_unplugged_cpus(void)
1be7fcb8 1451{
c93bbbef 1452 CPUState *cpu;
1be7fcb8 1453
c93bbbef
AB
1454 CPU_FOREACH(cpu) {
1455 if (cpu->unplug && !cpu_can_run(cpu)) {
1456 qemu_tcg_destroy_vcpu(cpu);
1457 cpu->created = false;
1458 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1459 break;
1460 }
1461 }
1be7fcb8 1462}
bdb7ca67 1463
6546706d
AB
1464/* Single-threaded TCG
1465 *
1466 * In the single-threaded case each vCPU is simulated in turn. If
1467 * there is more than a single vCPU we create a simple timer to kick
1468 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1469 * This is done explicitly rather than relying on side-effects
1470 * elsewhere.
1471 */
1472
37257942 1473static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1474{
c3586ba7 1475 CPUState *cpu = arg;
296af7c9 1476
f28d0dfd 1477 assert(tcg_enabled());
ab28bd23 1478 rcu_register_thread();
3468b59e 1479 tcg_register_thread();
ab28bd23 1480
2e7f7a3c 1481 qemu_mutex_lock_iothread();
814e612e 1482 qemu_thread_get_self(cpu->thread);
296af7c9 1483
5a9c973b
DH
1484 cpu->thread_id = qemu_get_thread_id();
1485 cpu->created = true;
1486 cpu->can_do_io = 1;
296af7c9 1487 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1488 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1489
fa7d1867 1490 /* wait for initial kick-off after machine start */
c28e399c 1491 while (first_cpu->stopped) {
d5f8d613 1492 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1493
1494 /* process any pending work */
bdc44640 1495 CPU_FOREACH(cpu) {
37257942 1496 current_cpu = cpu;
182735ef 1497 qemu_wait_io_event_common(cpu);
8e564b4e 1498 }
0ab07c62 1499 }
296af7c9 1500
6546706d
AB
1501 start_tcg_kick_timer();
1502
c93bbbef
AB
1503 cpu = first_cpu;
1504
e5143e30
AB
1505 /* process any pending work */
1506 cpu->exit_request = 1;
1507
296af7c9 1508 while (1) {
d759c951
AB
1509 qemu_mutex_unlock_iothread();
1510 replay_mutex_lock();
1511 qemu_mutex_lock_iothread();
c93bbbef
AB
1512 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1513 qemu_account_warp_timer();
1514
6b8f0187
PB
1515 /* Run the timers here. This is much more efficient than
1516 * waking up the I/O thread and waiting for completion.
1517 */
1518 handle_icount_deadline();
1519
d759c951
AB
1520 replay_mutex_unlock();
1521
c93bbbef
AB
1522 if (!cpu) {
1523 cpu = first_cpu;
1524 }
1525
e5143e30
AB
1526 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1527
791158d9 1528 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1529 current_cpu = cpu;
c93bbbef
AB
1530
1531 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1532 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1533
1534 if (cpu_can_run(cpu)) {
1535 int r;
05248382 1536
d759c951 1537 qemu_mutex_unlock_iothread();
05248382
AB
1538 prepare_icount_for_run(cpu);
1539
c93bbbef 1540 r = tcg_cpu_exec(cpu);
05248382
AB
1541
1542 process_icount_data(cpu);
d759c951 1543 qemu_mutex_lock_iothread();
05248382 1544
c93bbbef
AB
1545 if (r == EXCP_DEBUG) {
1546 cpu_handle_guest_debug(cpu);
1547 break;
08e73c48
PK
1548 } else if (r == EXCP_ATOMIC) {
1549 qemu_mutex_unlock_iothread();
1550 cpu_exec_step_atomic(cpu);
1551 qemu_mutex_lock_iothread();
1552 break;
c93bbbef 1553 }
37257942 1554 } else if (cpu->stop) {
c93bbbef
AB
1555 if (cpu->unplug) {
1556 cpu = CPU_NEXT(cpu);
1557 }
1558 break;
1559 }
1560
e5143e30
AB
1561 cpu = CPU_NEXT(cpu);
1562 } /* while (cpu && !cpu->exit_request).. */
1563
791158d9
AB
1564 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1565 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1566
e5143e30
AB
1567 if (cpu && cpu->exit_request) {
1568 atomic_mb_set(&cpu->exit_request, 0);
1569 }
ac70aafc 1570
013aabdc
CD
1571 if (use_icount && all_cpu_threads_idle()) {
1572 /*
1573 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1574 * in the main_loop, wake it up in order to start the warp timer.
1575 */
1576 qemu_notify_event();
1577 }
1578
a8efa606 1579 qemu_tcg_rr_wait_io_event();
c93bbbef 1580 deal_with_unplugged_cpus();
296af7c9
BS
1581 }
1582
9b0605f9 1583 rcu_unregister_thread();
296af7c9
BS
1584 return NULL;
1585}
1586
b0cb0a66
VP
1587static void *qemu_hax_cpu_thread_fn(void *arg)
1588{
1589 CPUState *cpu = arg;
1590 int r;
b3d3a426 1591
9857c2d2 1592 rcu_register_thread();
b3d3a426 1593 qemu_mutex_lock_iothread();
b0cb0a66 1594 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1595
1596 cpu->thread_id = qemu_get_thread_id();
1597 cpu->created = true;
b0cb0a66
VP
1598 current_cpu = cpu;
1599
1600 hax_init_vcpu(cpu);
1601 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1602 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1603
9857c2d2 1604 do {
b0cb0a66
VP
1605 if (cpu_can_run(cpu)) {
1606 r = hax_smp_cpu_exec(cpu);
1607 if (r == EXCP_DEBUG) {
1608 cpu_handle_guest_debug(cpu);
1609 }
1610 }
1611
db08b687 1612 qemu_wait_io_event(cpu);
9857c2d2
PB
1613 } while (!cpu->unplug || cpu_can_run(cpu));
1614 rcu_unregister_thread();
b0cb0a66
VP
1615 return NULL;
1616}
1617
c97d6d2c
SAGDR
1618/* The HVF-specific vCPU thread function. This one should only run when the host
1619 * CPU supports the VMX "unrestricted guest" feature. */
1620static void *qemu_hvf_cpu_thread_fn(void *arg)
1621{
1622 CPUState *cpu = arg;
1623
1624 int r;
1625
1626 assert(hvf_enabled());
1627
1628 rcu_register_thread();
1629
1630 qemu_mutex_lock_iothread();
1631 qemu_thread_get_self(cpu->thread);
1632
1633 cpu->thread_id = qemu_get_thread_id();
1634 cpu->can_do_io = 1;
1635 current_cpu = cpu;
1636
1637 hvf_init_vcpu(cpu);
1638
1639 /* signal CPU creation */
1640 cpu->created = true;
1641 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1642 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1643
1644 do {
1645 if (cpu_can_run(cpu)) {
1646 r = hvf_vcpu_exec(cpu);
1647 if (r == EXCP_DEBUG) {
1648 cpu_handle_guest_debug(cpu);
1649 }
1650 }
db08b687 1651 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1652 } while (!cpu->unplug || cpu_can_run(cpu));
1653
1654 hvf_vcpu_destroy(cpu);
1655 cpu->created = false;
1656 qemu_cond_signal(&qemu_cpu_cond);
1657 qemu_mutex_unlock_iothread();
8178e637 1658 rcu_unregister_thread();
c97d6d2c
SAGDR
1659 return NULL;
1660}
1661
19306806
JTV
1662static void *qemu_whpx_cpu_thread_fn(void *arg)
1663{
1664 CPUState *cpu = arg;
1665 int r;
1666
1667 rcu_register_thread();
1668
1669 qemu_mutex_lock_iothread();
1670 qemu_thread_get_self(cpu->thread);
1671 cpu->thread_id = qemu_get_thread_id();
1672 current_cpu = cpu;
1673
1674 r = whpx_init_vcpu(cpu);
1675 if (r < 0) {
1676 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1677 exit(1);
1678 }
1679
1680 /* signal CPU creation */
1681 cpu->created = true;
1682 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1683 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1684
1685 do {
1686 if (cpu_can_run(cpu)) {
1687 r = whpx_vcpu_exec(cpu);
1688 if (r == EXCP_DEBUG) {
1689 cpu_handle_guest_debug(cpu);
1690 }
1691 }
1692 while (cpu_thread_is_idle(cpu)) {
1693 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1694 }
1695 qemu_wait_io_event_common(cpu);
1696 } while (!cpu->unplug || cpu_can_run(cpu));
1697
1698 whpx_destroy_vcpu(cpu);
1699 cpu->created = false;
1700 qemu_cond_signal(&qemu_cpu_cond);
1701 qemu_mutex_unlock_iothread();
1702 rcu_unregister_thread();
c97d6d2c
SAGDR
1703 return NULL;
1704}
1705
b0cb0a66
VP
1706#ifdef _WIN32
1707static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1708{
1709}
1710#endif
1711
37257942
AB
1712/* Multi-threaded TCG
1713 *
1714 * In the multi-threaded case each vCPU has its own thread. The TLS
1715 * variable current_cpu can be used deep in the code to find the
1716 * current CPUState for a given thread.
1717 */
1718
1719static void *qemu_tcg_cpu_thread_fn(void *arg)
1720{
1721 CPUState *cpu = arg;
1722
f28d0dfd 1723 assert(tcg_enabled());
bf51c720
AB
1724 g_assert(!use_icount);
1725
37257942 1726 rcu_register_thread();
3468b59e 1727 tcg_register_thread();
37257942
AB
1728
1729 qemu_mutex_lock_iothread();
1730 qemu_thread_get_self(cpu->thread);
1731
1732 cpu->thread_id = qemu_get_thread_id();
1733 cpu->created = true;
1734 cpu->can_do_io = 1;
1735 current_cpu = cpu;
1736 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1737 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1738
1739 /* process any pending work */
1740 cpu->exit_request = 1;
1741
54961aac 1742 do {
37257942
AB
1743 if (cpu_can_run(cpu)) {
1744 int r;
d759c951 1745 qemu_mutex_unlock_iothread();
37257942 1746 r = tcg_cpu_exec(cpu);
d759c951 1747 qemu_mutex_lock_iothread();
37257942
AB
1748 switch (r) {
1749 case EXCP_DEBUG:
1750 cpu_handle_guest_debug(cpu);
1751 break;
1752 case EXCP_HALTED:
1753 /* during start-up the vCPU is reset and the thread is
1754 * kicked several times. If we don't ensure we go back
1755 * to sleep in the halted state we won't cleanly
1756 * start-up when the vCPU is enabled.
1757 *
1758 * cpu->halted should ensure we sleep in wait_io_event
1759 */
1760 g_assert(cpu->halted);
1761 break;
08e73c48
PK
1762 case EXCP_ATOMIC:
1763 qemu_mutex_unlock_iothread();
1764 cpu_exec_step_atomic(cpu);
1765 qemu_mutex_lock_iothread();
37257942
AB
1766 default:
1767 /* Ignore everything else? */
1768 break;
1769 }
1770 }
1771
37257942 1772 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1773 qemu_wait_io_event(cpu);
9b0605f9 1774 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1775
9b0605f9
PB
1776 qemu_tcg_destroy_vcpu(cpu);
1777 cpu->created = false;
1778 qemu_cond_signal(&qemu_cpu_cond);
1779 qemu_mutex_unlock_iothread();
1780 rcu_unregister_thread();
37257942
AB
1781 return NULL;
1782}
1783
2ff09a40 1784static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1785{
1786#ifndef _WIN32
1787 int err;
1788
e0c38211
PB
1789 if (cpu->thread_kicked) {
1790 return;
9102deda 1791 }
e0c38211 1792 cpu->thread_kicked = true;
814e612e 1793 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1794 if (err && err != ESRCH) {
cc015e9a
PB
1795 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1796 exit(1);
1797 }
1798#else /* _WIN32 */
b0cb0a66 1799 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1800 if (whpx_enabled()) {
1801 whpx_vcpu_kick(cpu);
1802 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1803 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1804 __func__, GetLastError());
1805 exit(1);
1806 }
1807 }
e0c38211
PB
1808#endif
1809}
ed9164a3 1810
c08d7424 1811void qemu_cpu_kick(CPUState *cpu)
296af7c9 1812{
f5c121b8 1813 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1814 if (tcg_enabled()) {
791158d9 1815 cpu_exit(cpu);
37257942 1816 /* NOP unless doing single-thread RR */
791158d9 1817 qemu_cpu_kick_rr_cpu();
e0c38211 1818 } else {
b0cb0a66
VP
1819 if (hax_enabled()) {
1820 /*
1821 * FIXME: race condition with the exit_request check in
1822 * hax_vcpu_hax_exec
1823 */
1824 cpu->exit_request = 1;
1825 }
e0c38211
PB
1826 qemu_cpu_kick_thread(cpu);
1827 }
296af7c9
BS
1828}
1829
46d62fac 1830void qemu_cpu_kick_self(void)
296af7c9 1831{
4917cf44 1832 assert(current_cpu);
9102deda 1833 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1834}
1835
60e82579 1836bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1837{
814e612e 1838 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1839}
1840
79e2b9ae 1841bool qemu_in_vcpu_thread(void)
aa723c23 1842{
4917cf44 1843 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1844}
1845
afbe7053
PB
1846static __thread bool iothread_locked = false;
1847
1848bool qemu_mutex_iothread_locked(void)
1849{
1850 return iothread_locked;
1851}
1852
cb764d06
EC
1853/*
1854 * The BQL is taken from so many places that it is worth profiling the
1855 * callers directly, instead of funneling them all through a single function.
1856 */
1857void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1858{
cb764d06
EC
1859 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1860
8d04fb55 1861 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1862 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1863 iothread_locked = true;
296af7c9
BS
1864}
1865
1866void qemu_mutex_unlock_iothread(void)
1867{
8d04fb55 1868 g_assert(qemu_mutex_iothread_locked());
afbe7053 1869 iothread_locked = false;
296af7c9
BS
1870 qemu_mutex_unlock(&qemu_global_mutex);
1871}
1872
e8faee06 1873static bool all_vcpus_paused(void)
296af7c9 1874{
bdc44640 1875 CPUState *cpu;
296af7c9 1876
bdc44640 1877 CPU_FOREACH(cpu) {
182735ef 1878 if (!cpu->stopped) {
e8faee06 1879 return false;
0ab07c62 1880 }
296af7c9
BS
1881 }
1882
e8faee06 1883 return true;
296af7c9
BS
1884}
1885
1886void pause_all_vcpus(void)
1887{
bdc44640 1888 CPUState *cpu;
296af7c9 1889
40daca54 1890 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1891 CPU_FOREACH(cpu) {
ebd05fea
DH
1892 if (qemu_cpu_is_self(cpu)) {
1893 qemu_cpu_stop(cpu, true);
1894 } else {
1895 cpu->stop = true;
1896 qemu_cpu_kick(cpu);
1897 }
d798e974
JK
1898 }
1899
d759c951
AB
1900 /* We need to drop the replay_lock so any vCPU threads woken up
1901 * can finish their replay tasks
1902 */
1903 replay_mutex_unlock();
1904
296af7c9 1905 while (!all_vcpus_paused()) {
be7d6c57 1906 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1907 CPU_FOREACH(cpu) {
182735ef 1908 qemu_cpu_kick(cpu);
296af7c9
BS
1909 }
1910 }
d759c951
AB
1911
1912 qemu_mutex_unlock_iothread();
1913 replay_mutex_lock();
1914 qemu_mutex_lock_iothread();
296af7c9
BS
1915}
1916
2993683b
IM
1917void cpu_resume(CPUState *cpu)
1918{
1919 cpu->stop = false;
1920 cpu->stopped = false;
1921 qemu_cpu_kick(cpu);
1922}
1923
296af7c9
BS
1924void resume_all_vcpus(void)
1925{
bdc44640 1926 CPUState *cpu;
296af7c9 1927
40daca54 1928 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1929 CPU_FOREACH(cpu) {
182735ef 1930 cpu_resume(cpu);
296af7c9
BS
1931 }
1932}
1933
dbadee4f 1934void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1935{
1936 cpu->stop = true;
1937 cpu->unplug = true;
1938 qemu_cpu_kick(cpu);
dbadee4f
PB
1939 qemu_mutex_unlock_iothread();
1940 qemu_thread_join(cpu->thread);
1941 qemu_mutex_lock_iothread();
2c579042
BR
1942}
1943
4900116e
DDAG
1944/* For temporary buffers for forming a name */
1945#define VCPU_THREAD_NAME_SIZE 16
1946
e5ab30a2 1947static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1948{
4900116e 1949 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1950 static QemuCond *single_tcg_halt_cond;
1951 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1952 static int tcg_region_inited;
1953
f28d0dfd 1954 assert(tcg_enabled());
e8feb96f
EC
1955 /*
1956 * Initialize TCG regions--once. Now is a good time, because:
1957 * (1) TCG's init context, prologue and target globals have been set up.
1958 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1959 * -accel flag is processed, so the check doesn't work then).
1960 */
1961 if (!tcg_region_inited) {
1962 tcg_region_inited = 1;
1963 tcg_region_init();
1964 }
4900116e 1965
37257942 1966 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1967 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1968 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1969 qemu_cond_init(cpu->halt_cond);
37257942
AB
1970
1971 if (qemu_tcg_mttcg_enabled()) {
1972 /* create a thread per vCPU with TCG (MTTCG) */
1973 parallel_cpus = true;
1974 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1975 cpu->cpu_index);
37257942
AB
1976
1977 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1978 cpu, QEMU_THREAD_JOINABLE);
1979
1980 } else {
1981 /* share a single thread for all cpus with TCG */
1982 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1983 qemu_thread_create(cpu->thread, thread_name,
1984 qemu_tcg_rr_cpu_thread_fn,
1985 cpu, QEMU_THREAD_JOINABLE);
1986
1987 single_tcg_halt_cond = cpu->halt_cond;
1988 single_tcg_cpu_thread = cpu->thread;
1989 }
1ecf47bf 1990#ifdef _WIN32
814e612e 1991 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1992#endif
296af7c9 1993 } else {
37257942
AB
1994 /* For non-MTTCG cases we share the thread */
1995 cpu->thread = single_tcg_cpu_thread;
1996 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1997 cpu->thread_id = first_cpu->thread_id;
1998 cpu->can_do_io = 1;
1999 cpu->created = true;
296af7c9
BS
2000 }
2001}
2002
b0cb0a66
VP
2003static void qemu_hax_start_vcpu(CPUState *cpu)
2004{
2005 char thread_name[VCPU_THREAD_NAME_SIZE];
2006
2007 cpu->thread = g_malloc0(sizeof(QemuThread));
2008 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2009 qemu_cond_init(cpu->halt_cond);
2010
2011 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2012 cpu->cpu_index);
2013 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2014 cpu, QEMU_THREAD_JOINABLE);
2015#ifdef _WIN32
2016 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2017#endif
b0cb0a66
VP
2018}
2019
48a106bd 2020static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2021{
4900116e
DDAG
2022 char thread_name[VCPU_THREAD_NAME_SIZE];
2023
814e612e 2024 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2025 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2026 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2027 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2028 cpu->cpu_index);
2029 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2030 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2031}
2032
c97d6d2c
SAGDR
2033static void qemu_hvf_start_vcpu(CPUState *cpu)
2034{
2035 char thread_name[VCPU_THREAD_NAME_SIZE];
2036
2037 /* HVF currently does not support TCG, and only runs in
2038 * unrestricted-guest mode. */
2039 assert(hvf_enabled());
2040
2041 cpu->thread = g_malloc0(sizeof(QemuThread));
2042 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2043 qemu_cond_init(cpu->halt_cond);
2044
2045 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2046 cpu->cpu_index);
2047 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2048 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2049}
2050
19306806
JTV
2051static void qemu_whpx_start_vcpu(CPUState *cpu)
2052{
2053 char thread_name[VCPU_THREAD_NAME_SIZE];
2054
2055 cpu->thread = g_malloc0(sizeof(QemuThread));
2056 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2057 qemu_cond_init(cpu->halt_cond);
2058 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2059 cpu->cpu_index);
2060 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2061 cpu, QEMU_THREAD_JOINABLE);
2062#ifdef _WIN32
2063 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2064#endif
19306806
JTV
2065}
2066
10a9021d 2067static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2068{
4900116e
DDAG
2069 char thread_name[VCPU_THREAD_NAME_SIZE];
2070
814e612e 2071 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2072 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2073 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2074 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2075 cpu->cpu_index);
2076 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2077 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2078}
2079
c643bed9 2080void qemu_init_vcpu(CPUState *cpu)
296af7c9 2081{
5cc8767d
LX
2082 MachineState *ms = MACHINE(qdev_get_machine());
2083
2084 cpu->nr_cores = ms->smp.cores;
2085 cpu->nr_threads = ms->smp.threads;
f324e766 2086 cpu->stopped = true;
9c09a251 2087 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2088
2089 if (!cpu->as) {
2090 /* If the target cpu hasn't set up any address spaces itself,
2091 * give it the default one.
2092 */
12ebc9a7 2093 cpu->num_ases = 1;
80ceb07a 2094 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2095 }
2096
0ab07c62 2097 if (kvm_enabled()) {
48a106bd 2098 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2099 } else if (hax_enabled()) {
2100 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2101 } else if (hvf_enabled()) {
2102 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2103 } else if (tcg_enabled()) {
e5ab30a2 2104 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2105 } else if (whpx_enabled()) {
2106 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2107 } else {
10a9021d 2108 qemu_dummy_start_vcpu(cpu);
0ab07c62 2109 }
81e96311
DH
2110
2111 while (!cpu->created) {
2112 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2113 }
296af7c9
BS
2114}
2115
b4a3d965 2116void cpu_stop_current(void)
296af7c9 2117{
4917cf44 2118 if (current_cpu) {
0ec7e677
PM
2119 current_cpu->stop = true;
2120 cpu_exit(current_cpu);
b4a3d965 2121 }
296af7c9
BS
2122}
2123
56983463 2124int vm_stop(RunState state)
296af7c9 2125{
aa723c23 2126 if (qemu_in_vcpu_thread()) {
74892d24 2127 qemu_system_vmstop_request_prepare();
1dfb4dd9 2128 qemu_system_vmstop_request(state);
296af7c9
BS
2129 /*
2130 * FIXME: should not return to device code in case
2131 * vm_stop() has been requested.
2132 */
b4a3d965 2133 cpu_stop_current();
56983463 2134 return 0;
296af7c9 2135 }
56983463 2136
4486e89c 2137 return do_vm_stop(state, true);
296af7c9
BS
2138}
2139
2d76e823
CI
2140/**
2141 * Prepare for (re)starting the VM.
2142 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2143 * running or in case of an error condition), 0 otherwise.
2144 */
2145int vm_prepare_start(void)
2146{
2147 RunState requested;
2d76e823
CI
2148
2149 qemu_vmstop_requested(&requested);
2150 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2151 return -1;
2152 }
2153
2154 /* Ensure that a STOP/RESUME pair of events is emitted if a
2155 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2156 * example, according to documentation is always followed by
2157 * the STOP event.
2158 */
2159 if (runstate_is_running()) {
3ab72385
PX
2160 qapi_event_send_stop();
2161 qapi_event_send_resume();
f056158d 2162 return -1;
2d76e823
CI
2163 }
2164
2165 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2166 qapi_event_send_resume();
f056158d
MA
2167
2168 replay_enable_events();
2169 cpu_enable_ticks();
2170 runstate_set(RUN_STATE_RUNNING);
2171 vm_state_notify(1, RUN_STATE_RUNNING);
2172 return 0;
2d76e823
CI
2173}
2174
2175void vm_start(void)
2176{
2177 if (!vm_prepare_start()) {
2178 resume_all_vcpus();
2179 }
2180}
2181
8a9236f1
LC
2182/* does a state transition even if the VM is already stopped,
2183 current state is forgotten forever */
56983463 2184int vm_stop_force_state(RunState state)
8a9236f1
LC
2185{
2186 if (runstate_is_running()) {
56983463 2187 return vm_stop(state);
8a9236f1
LC
2188 } else {
2189 runstate_set(state);
b2780d32
WC
2190
2191 bdrv_drain_all();
594a45ce
KW
2192 /* Make sure to return an error if the flush in a previous vm_stop()
2193 * failed. */
22af08ea 2194 return bdrv_flush_all();
8a9236f1
LC
2195 }
2196}
2197
0442428a 2198void list_cpus(const char *optarg)
262353cb
BS
2199{
2200 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2201#if defined(cpu_list)
0442428a 2202 cpu_list();
262353cb
BS
2203#endif
2204}
de0b36b6 2205
0cfd6a9a
LC
2206void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2207 bool has_cpu, int64_t cpu_index, Error **errp)
2208{
2209 FILE *f;
2210 uint32_t l;
55e5c285 2211 CPUState *cpu;
0cfd6a9a 2212 uint8_t buf[1024];
0dc9daf0 2213 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2214
2215 if (!has_cpu) {
2216 cpu_index = 0;
2217 }
2218
151d1322
AF
2219 cpu = qemu_get_cpu(cpu_index);
2220 if (cpu == NULL) {
c6bd8c70
MA
2221 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2222 "a CPU number");
0cfd6a9a
LC
2223 return;
2224 }
2225
2226 f = fopen(filename, "wb");
2227 if (!f) {
618da851 2228 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2229 return;
2230 }
2231
2232 while (size != 0) {
2233 l = sizeof(buf);
2234 if (l > size)
2235 l = size;
2f4d0f59 2236 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2237 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2238 " specified", orig_addr, orig_size);
2f4d0f59
AK
2239 goto exit;
2240 }
0cfd6a9a 2241 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2242 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2243 goto exit;
2244 }
2245 addr += l;
2246 size -= l;
2247 }
2248
2249exit:
2250 fclose(f);
2251}
6d3962bf
LC
2252
2253void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2254 Error **errp)
2255{
2256 FILE *f;
2257 uint32_t l;
2258 uint8_t buf[1024];
2259
2260 f = fopen(filename, "wb");
2261 if (!f) {
618da851 2262 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2263 return;
2264 }
2265
2266 while (size != 0) {
2267 l = sizeof(buf);
2268 if (l > size)
2269 l = size;
eb6282f2 2270 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2271 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2272 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2273 goto exit;
2274 }
2275 addr += l;
2276 size -= l;
2277 }
2278
2279exit:
2280 fclose(f);
2281}
ab49ab5c
LC
2282
2283void qmp_inject_nmi(Error **errp)
2284{
9cb805fd 2285 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2286}
27498bef 2287
76c86615 2288void dump_drift_info(void)
27498bef
ST
2289{
2290 if (!use_icount) {
2291 return;
2292 }
2293
76c86615 2294 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2295 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2296 if (icount_align_option) {
76c86615
MA
2297 qemu_printf("Max guest delay %"PRIi64" ms\n",
2298 -max_delay / SCALE_MS);
2299 qemu_printf("Max guest advance %"PRIi64" ms\n",
2300 max_advance / SCALE_MS);
27498bef 2301 } else {
76c86615
MA
2302 qemu_printf("Max guest delay NA\n");
2303 qemu_printf("Max guest advance NA\n");
27498bef
ST
2304 }
2305}