]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
83c9089e 28#include "monitor/monitor.h"
e688df6b 29#include "qapi/error.h"
112ed241 30#include "qapi/qapi-commands-misc.h"
9af23989 31#include "qapi/qapi-events-run-state.h"
a4e15de9 32#include "qapi/qmp/qerror.h"
d49b6836 33#include "qemu/error-report.h"
76c86615 34#include "qemu/qemu-print.h"
14a48c1d 35#include "sysemu/tcg.h"
da31d594 36#include "sysemu/block-backend.h"
022c62cb 37#include "exec/gdbstub.h"
9c17d615 38#include "sysemu/dma.h"
b3946626 39#include "sysemu/hw_accel.h"
9c17d615 40#include "sysemu/kvm.h"
b0cb0a66 41#include "sysemu/hax.h"
c97d6d2c 42#include "sysemu/hvf.h"
19306806 43#include "sysemu/whpx.h"
63c91552 44#include "exec/exec-all.h"
296af7c9 45
1de7afc9 46#include "qemu/thread.h"
9c17d615
PB
47#include "sysemu/cpus.h"
48#include "sysemu/qtest.h"
1de7afc9 49#include "qemu/main-loop.h"
922a01a0 50#include "qemu/option.h"
1de7afc9 51#include "qemu/bitmap.h"
cb365646 52#include "qemu/seqlock.h"
9c09a251 53#include "qemu/guest-random.h"
8d4e9146 54#include "tcg.h"
9cb805fd 55#include "hw/nmi.h"
8b427044 56#include "sysemu/replay.h"
5cc8767d 57#include "hw/boards.h"
0ff0fc19 58
6d9cb73c
JK
59#ifdef CONFIG_LINUX
60
61#include <sys/prctl.h>
62
c0532a76
MT
63#ifndef PR_MCE_KILL
64#define PR_MCE_KILL 33
65#endif
66
6d9cb73c
JK
67#ifndef PR_MCE_KILL_SET
68#define PR_MCE_KILL_SET 1
69#endif
70
71#ifndef PR_MCE_KILL_EARLY
72#define PR_MCE_KILL_EARLY 1
73#endif
74
75#endif /* CONFIG_LINUX */
76
27498bef
ST
77int64_t max_delay;
78int64_t max_advance;
296af7c9 79
2adcc85d
JH
80/* vcpu throttling controls */
81static QEMUTimer *throttle_timer;
82static unsigned int throttle_percentage;
83
84#define CPU_THROTTLE_PCT_MIN 1
85#define CPU_THROTTLE_PCT_MAX 99
86#define CPU_THROTTLE_TIMESLICE_NS 10000000
87
321bc0b2
TC
88bool cpu_is_stopped(CPUState *cpu)
89{
90 return cpu->stopped || !runstate_is_running();
91}
92
a98ae1d8 93static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 94{
c64ca814 95 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
96 return false;
97 }
321bc0b2 98 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
99 return true;
100 }
8c2e1b00 101 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 102 kvm_halt_in_kernel()) {
ac873f1e
PM
103 return false;
104 }
105 return true;
106}
107
108static bool all_cpu_threads_idle(void)
109{
182735ef 110 CPUState *cpu;
ac873f1e 111
bdc44640 112 CPU_FOREACH(cpu) {
182735ef 113 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
114 return false;
115 }
116 }
117 return true;
118}
119
946fb27c
PB
120/***********************************************************/
121/* guest cycle counter */
122
a3270e19
PB
123/* Protected by TimersState seqlock */
124
5045e9d9 125static bool icount_sleep = true;
946fb27c
PB
126/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127#define MAX_ICOUNT_SHIFT 10
a3270e19 128
946fb27c 129typedef struct TimersState {
cb365646 130 /* Protected by BQL. */
946fb27c
PB
131 int64_t cpu_ticks_prev;
132 int64_t cpu_ticks_offset;
cb365646 133
94377115
PB
134 /* Protect fields that can be respectively read outside the
135 * BQL, and written from multiple threads.
cb365646
LPF
136 */
137 QemuSeqLock vm_clock_seqlock;
94377115
PB
138 QemuSpin vm_clock_lock;
139
140 int16_t cpu_ticks_enabled;
c96778bb 141
c1ff073c 142 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
143 int16_t icount_time_shift;
144
c96778bb
FK
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
94377115
PB
147
148 int64_t vm_clock_warp_start;
149 int64_t cpu_clock_offset;
150
c96778bb
FK
151 /* Only written by TCG thread */
152 int64_t qemu_icount;
94377115 153
b39e3f34 154 /* for adjusting icount */
b39e3f34
PD
155 QEMUTimer *icount_rt_timer;
156 QEMUTimer *icount_vm_timer;
157 QEMUTimer *icount_warp_timer;
946fb27c
PB
158} TimersState;
159
d9cd4007 160static TimersState timers_state;
8d4e9146
FK
161bool mttcg_enabled;
162
163/*
164 * We default to false if we know other options have been enabled
165 * which are currently incompatible with MTTCG. Otherwise when each
166 * guest (target) has been updated to support:
167 * - atomic instructions
168 * - memory ordering primitives (barriers)
169 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
170 *
171 * Once a guest architecture has been converted to the new primitives
172 * there are two remaining limitations to check.
173 *
174 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
175 * - The host must have a stronger memory order than the guest
176 *
177 * It may be possible in future to support strong guests on weak hosts
178 * but that will require tagging all load/stores in a guest with their
179 * implicit memory order requirements which would likely slow things
180 * down a lot.
181 */
182
183static bool check_tcg_memory_orders_compatible(void)
184{
185#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
186 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
187#else
188 return false;
189#endif
190}
191
192static bool default_mttcg_enabled(void)
193{
83fd9629 194 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
195 return false;
196 } else {
197#ifdef TARGET_SUPPORTS_MTTCG
198 return check_tcg_memory_orders_compatible();
199#else
200 return false;
201#endif
202 }
203}
204
205void qemu_tcg_configure(QemuOpts *opts, Error **errp)
206{
207 const char *t = qemu_opt_get(opts, "thread");
208 if (t) {
209 if (strcmp(t, "multi") == 0) {
210 if (TCG_OVERSIZED_GUEST) {
211 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
212 } else if (use_icount) {
213 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 214 } else {
86953503 215#ifndef TARGET_SUPPORTS_MTTCG
0765691e
MA
216 warn_report("Guest not yet converted to MTTCG - "
217 "you may get unexpected results");
c34c7620 218#endif
8d4e9146 219 if (!check_tcg_memory_orders_compatible()) {
0765691e
MA
220 warn_report("Guest expects a stronger memory ordering "
221 "than the host provides");
8cfef892 222 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
223 }
224 mttcg_enabled = true;
225 }
226 } else if (strcmp(t, "single") == 0) {
227 mttcg_enabled = false;
228 } else {
229 error_setg(errp, "Invalid 'thread' setting %s", t);
230 }
231 } else {
232 mttcg_enabled = default_mttcg_enabled();
233 }
234}
946fb27c 235
e4cd9657
AB
236/* The current number of executed instructions is based on what we
237 * originally budgeted minus the current state of the decrementing
238 * icount counters in extra/u16.low.
239 */
240static int64_t cpu_get_icount_executed(CPUState *cpu)
241{
5e140196
RH
242 return (cpu->icount_budget -
243 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
244}
245
512d3c80
AB
246/*
247 * Update the global shared timer_state.qemu_icount to take into
248 * account executed instructions. This is done by the TCG vCPU
249 * thread so the main-loop can see time has moved forward.
250 */
9b4e6f49 251static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
252{
253 int64_t executed = cpu_get_icount_executed(cpu);
254 cpu->icount_budget -= executed;
255
38adcb6e
EC
256 atomic_set_i64(&timers_state.qemu_icount,
257 timers_state.qemu_icount + executed);
9b4e6f49
PB
258}
259
260/*
261 * Update the global shared timer_state.qemu_icount to take into
262 * account executed instructions. This is done by the TCG vCPU
263 * thread so the main-loop can see time has moved forward.
264 */
265void cpu_update_icount(CPUState *cpu)
266{
267 seqlock_write_lock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
269 cpu_update_icount_locked(cpu);
94377115
PB
270 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
271 &timers_state.vm_clock_lock);
512d3c80
AB
272}
273
c1ff073c 274static int64_t cpu_get_icount_raw_locked(void)
946fb27c 275{
4917cf44 276 CPUState *cpu = current_cpu;
946fb27c 277
243c5f77 278 if (cpu && cpu->running) {
414b15c9 279 if (!cpu->can_do_io) {
493d89bf 280 error_report("Bad icount read");
2a62914b 281 exit(1);
946fb27c 282 }
e4cd9657 283 /* Take into account what has run */
9b4e6f49 284 cpu_update_icount_locked(cpu);
946fb27c 285 }
38adcb6e
EC
286 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
287 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
288}
289
2a62914b
PD
290static int64_t cpu_get_icount_locked(void)
291{
c1ff073c 292 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
293 return atomic_read_i64(&timers_state.qemu_icount_bias) +
294 cpu_icount_to_ns(icount);
c1ff073c
PB
295}
296
297int64_t cpu_get_icount_raw(void)
298{
299 int64_t icount;
300 unsigned start;
301
302 do {
303 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
304 icount = cpu_get_icount_raw_locked();
305 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
306
307 return icount;
946fb27c
PB
308}
309
c1ff073c 310/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
311int64_t cpu_get_icount(void)
312{
313 int64_t icount;
314 unsigned start;
315
316 do {
317 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
318 icount = cpu_get_icount_locked();
319 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
320
321 return icount;
322}
323
3f031313
FK
324int64_t cpu_icount_to_ns(int64_t icount)
325{
c1ff073c 326 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
327}
328
f2a4ad6d
PB
329static int64_t cpu_get_ticks_locked(void)
330{
331 int64_t ticks = timers_state.cpu_ticks_offset;
332 if (timers_state.cpu_ticks_enabled) {
333 ticks += cpu_get_host_ticks();
334 }
335
336 if (timers_state.cpu_ticks_prev > ticks) {
337 /* Non increasing ticks may happen if the host uses software suspend. */
338 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
339 ticks = timers_state.cpu_ticks_prev;
340 }
341
342 timers_state.cpu_ticks_prev = ticks;
343 return ticks;
344}
345
d90f3cca
C
346/* return the time elapsed in VM between vm_start and vm_stop. Unless
347 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
348 * counter.
d90f3cca 349 */
946fb27c
PB
350int64_t cpu_get_ticks(void)
351{
5f3e3101
PB
352 int64_t ticks;
353
946fb27c
PB
354 if (use_icount) {
355 return cpu_get_icount();
356 }
5f3e3101 357
f2a4ad6d
PB
358 qemu_spin_lock(&timers_state.vm_clock_lock);
359 ticks = cpu_get_ticks_locked();
360 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 361 return ticks;
946fb27c
PB
362}
363
cb365646 364static int64_t cpu_get_clock_locked(void)
946fb27c 365{
1d45cea5 366 int64_t time;
cb365646 367
1d45cea5 368 time = timers_state.cpu_clock_offset;
5f3e3101 369 if (timers_state.cpu_ticks_enabled) {
1d45cea5 370 time += get_clock();
946fb27c 371 }
cb365646 372
1d45cea5 373 return time;
cb365646
LPF
374}
375
d90f3cca 376/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
377 * the time between vm_start and vm_stop
378 */
cb365646
LPF
379int64_t cpu_get_clock(void)
380{
381 int64_t ti;
382 unsigned start;
383
384 do {
385 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
386 ti = cpu_get_clock_locked();
387 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
388
389 return ti;
946fb27c
PB
390}
391
cb365646 392/* enable cpu_get_ticks()
3224e878 393 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 394 */
946fb27c
PB
395void cpu_enable_ticks(void)
396{
94377115
PB
397 seqlock_write_lock(&timers_state.vm_clock_seqlock,
398 &timers_state.vm_clock_lock);
946fb27c 399 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 400 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
401 timers_state.cpu_clock_offset -= get_clock();
402 timers_state.cpu_ticks_enabled = 1;
403 }
94377115
PB
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405 &timers_state.vm_clock_lock);
946fb27c
PB
406}
407
408/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 409 * cpu_get_ticks() after that.
3224e878 410 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 411 */
946fb27c
PB
412void cpu_disable_ticks(void)
413{
94377115
PB
414 seqlock_write_lock(&timers_state.vm_clock_seqlock,
415 &timers_state.vm_clock_lock);
946fb27c 416 if (timers_state.cpu_ticks_enabled) {
4a7428c5 417 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 418 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
419 timers_state.cpu_ticks_enabled = 0;
420 }
94377115
PB
421 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
422 &timers_state.vm_clock_lock);
946fb27c
PB
423}
424
425/* Correlation between real and virtual time is always going to be
426 fairly approximate, so ignore small variation.
427 When the guest is idle real and virtual time will be aligned in
428 the IO wait loop. */
73bcb24d 429#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
430
431static void icount_adjust(void)
432{
433 int64_t cur_time;
434 int64_t cur_icount;
435 int64_t delta;
a3270e19
PB
436
437 /* Protected by TimersState mutex. */
946fb27c 438 static int64_t last_delta;
468cc7cf 439
946fb27c
PB
440 /* If the VM is not running, then do nothing. */
441 if (!runstate_is_running()) {
442 return;
443 }
468cc7cf 444
94377115
PB
445 seqlock_write_lock(&timers_state.vm_clock_seqlock,
446 &timers_state.vm_clock_lock);
17a15f1b
PB
447 cur_time = cpu_get_clock_locked();
448 cur_icount = cpu_get_icount_locked();
468cc7cf 449
946fb27c
PB
450 delta = cur_icount - cur_time;
451 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
452 if (delta > 0
453 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 454 && timers_state.icount_time_shift > 0) {
946fb27c 455 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
456 atomic_set(&timers_state.icount_time_shift,
457 timers_state.icount_time_shift - 1);
946fb27c
PB
458 }
459 if (delta < 0
460 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 461 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 462 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
463 atomic_set(&timers_state.icount_time_shift,
464 timers_state.icount_time_shift + 1);
946fb27c
PB
465 }
466 last_delta = delta;
c97595d1
EC
467 atomic_set_i64(&timers_state.qemu_icount_bias,
468 cur_icount - (timers_state.qemu_icount
469 << timers_state.icount_time_shift));
94377115
PB
470 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
471 &timers_state.vm_clock_lock);
946fb27c
PB
472}
473
474static void icount_adjust_rt(void *opaque)
475{
b39e3f34 476 timer_mod(timers_state.icount_rt_timer,
1979b908 477 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
478 icount_adjust();
479}
480
481static void icount_adjust_vm(void *opaque)
482{
b39e3f34 483 timer_mod(timers_state.icount_vm_timer,
40daca54 484 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 485 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
486 icount_adjust();
487}
488
489static int64_t qemu_icount_round(int64_t count)
490{
c1ff073c
PB
491 int shift = atomic_read(&timers_state.icount_time_shift);
492 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
493}
494
efab87cf 495static void icount_warp_rt(void)
946fb27c 496{
ccffff48
AB
497 unsigned seq;
498 int64_t warp_start;
499
17a15f1b
PB
500 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
501 * changes from -1 to another value, so the race here is okay.
502 */
ccffff48
AB
503 do {
504 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 505 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
506 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
507
508 if (warp_start == -1) {
946fb27c
PB
509 return;
510 }
511
94377115
PB
512 seqlock_write_lock(&timers_state.vm_clock_seqlock,
513 &timers_state.vm_clock_lock);
946fb27c 514 if (runstate_is_running()) {
74c0b816
PB
515 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
516 cpu_get_clock_locked());
8ed961d9
PB
517 int64_t warp_delta;
518
b39e3f34 519 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 520 if (use_icount == 2) {
946fb27c 521 /*
40daca54 522 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
523 * far ahead of real time.
524 */
17a15f1b 525 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 526 int64_t delta = clock - cur_icount;
8ed961d9 527 warp_delta = MIN(warp_delta, delta);
946fb27c 528 }
c97595d1
EC
529 atomic_set_i64(&timers_state.qemu_icount_bias,
530 timers_state.qemu_icount_bias + warp_delta);
946fb27c 531 }
b39e3f34 532 timers_state.vm_clock_warp_start = -1;
94377115
PB
533 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
534 &timers_state.vm_clock_lock);
8ed961d9
PB
535
536 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
537 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
538 }
946fb27c
PB
539}
540
e76d1798 541static void icount_timer_cb(void *opaque)
efab87cf 542{
e76d1798
PD
543 /* No need for a checkpoint because the timer already synchronizes
544 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
545 */
546 icount_warp_rt();
efab87cf
PD
547}
548
8156be56
PB
549void qtest_clock_warp(int64_t dest)
550{
40daca54 551 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 552 AioContext *aio_context;
8156be56 553 assert(qtest_enabled());
efef88b3 554 aio_context = qemu_get_aio_context();
8156be56 555 while (clock < dest) {
40daca54 556 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 557 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 558
94377115
PB
559 seqlock_write_lock(&timers_state.vm_clock_seqlock,
560 &timers_state.vm_clock_lock);
c97595d1
EC
561 atomic_set_i64(&timers_state.qemu_icount_bias,
562 timers_state.qemu_icount_bias + warp);
94377115
PB
563 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
564 &timers_state.vm_clock_lock);
17a15f1b 565
40daca54 566 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 567 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 568 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 569 }
40daca54 570 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
571}
572
e76d1798 573void qemu_start_warp_timer(void)
946fb27c 574{
ce78d18c 575 int64_t clock;
946fb27c
PB
576 int64_t deadline;
577
e76d1798 578 if (!use_icount) {
946fb27c
PB
579 return;
580 }
581
8bd7f71d
PD
582 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
583 * do not fire, so computing the deadline does not make sense.
584 */
585 if (!runstate_is_running()) {
586 return;
587 }
588
0c08185f
PD
589 if (replay_mode != REPLAY_MODE_PLAY) {
590 if (!all_cpu_threads_idle()) {
591 return;
592 }
8bd7f71d 593
0c08185f
PD
594 if (qtest_enabled()) {
595 /* When testing, qtest commands advance icount. */
596 return;
597 }
946fb27c 598
0c08185f
PD
599 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
600 } else {
601 /* warp clock deterministically in record/replay mode */
602 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
603 /* vCPU is sleeping and warp can't be started.
604 It is probably a race condition: notification sent
605 to vCPU was processed in advance and vCPU went to sleep.
606 Therefore we have to wake it up for doing someting. */
607 if (replay_has_checkpoint()) {
608 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
609 }
610 return;
611 }
8156be56
PB
612 }
613
ac70aafc 614 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 615 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 616 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 617 if (deadline < 0) {
d7a0f71d
VC
618 static bool notified;
619 if (!icount_sleep && !notified) {
3dc6f869 620 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
621 notified = true;
622 }
ce78d18c 623 return;
ac70aafc
AB
624 }
625
946fb27c
PB
626 if (deadline > 0) {
627 /*
40daca54 628 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
629 * sleep. Otherwise, the CPU might be waiting for a future timer
630 * interrupt to wake it up, but the interrupt never comes because
631 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 632 * QEMU_CLOCK_VIRTUAL.
946fb27c 633 */
5045e9d9
VC
634 if (!icount_sleep) {
635 /*
636 * We never let VCPUs sleep in no sleep icount mode.
637 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
638 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
639 * It is useful when we want a deterministic execution time,
640 * isolated from host latencies.
641 */
94377115
PB
642 seqlock_write_lock(&timers_state.vm_clock_seqlock,
643 &timers_state.vm_clock_lock);
c97595d1
EC
644 atomic_set_i64(&timers_state.qemu_icount_bias,
645 timers_state.qemu_icount_bias + deadline);
94377115
PB
646 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
647 &timers_state.vm_clock_lock);
5045e9d9
VC
648 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
649 } else {
650 /*
651 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
652 * "real" time, (related to the time left until the next event) has
653 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
654 * This avoids that the warps are visible externally; for example,
655 * you will not be sending network packets continuously instead of
656 * every 100ms.
657 */
94377115
PB
658 seqlock_write_lock(&timers_state.vm_clock_seqlock,
659 &timers_state.vm_clock_lock);
b39e3f34
PD
660 if (timers_state.vm_clock_warp_start == -1
661 || timers_state.vm_clock_warp_start > clock) {
662 timers_state.vm_clock_warp_start = clock;
5045e9d9 663 }
94377115
PB
664 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
665 &timers_state.vm_clock_lock);
b39e3f34
PD
666 timer_mod_anticipate(timers_state.icount_warp_timer,
667 clock + deadline);
ce78d18c 668 }
ac70aafc 669 } else if (deadline == 0) {
40daca54 670 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
671 }
672}
673
e76d1798
PD
674static void qemu_account_warp_timer(void)
675{
676 if (!use_icount || !icount_sleep) {
677 return;
678 }
679
680 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
681 * do not fire, so computing the deadline does not make sense.
682 */
683 if (!runstate_is_running()) {
684 return;
685 }
686
687 /* warp clock deterministically in record/replay mode */
688 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
689 return;
690 }
691
b39e3f34 692 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
693 icount_warp_rt();
694}
695
d09eae37
FK
696static bool icount_state_needed(void *opaque)
697{
698 return use_icount;
699}
700
b39e3f34
PD
701static bool warp_timer_state_needed(void *opaque)
702{
703 TimersState *s = opaque;
704 return s->icount_warp_timer != NULL;
705}
706
707static bool adjust_timers_state_needed(void *opaque)
708{
709 TimersState *s = opaque;
710 return s->icount_rt_timer != NULL;
711}
712
713/*
714 * Subsection for warp timer migration is optional, because may not be created
715 */
716static const VMStateDescription icount_vmstate_warp_timer = {
717 .name = "timer/icount/warp_timer",
718 .version_id = 1,
719 .minimum_version_id = 1,
720 .needed = warp_timer_state_needed,
721 .fields = (VMStateField[]) {
722 VMSTATE_INT64(vm_clock_warp_start, TimersState),
723 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
724 VMSTATE_END_OF_LIST()
725 }
726};
727
728static const VMStateDescription icount_vmstate_adjust_timers = {
729 .name = "timer/icount/timers",
730 .version_id = 1,
731 .minimum_version_id = 1,
732 .needed = adjust_timers_state_needed,
733 .fields = (VMStateField[]) {
734 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
735 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
736 VMSTATE_END_OF_LIST()
737 }
738};
739
d09eae37
FK
740/*
741 * This is a subsection for icount migration.
742 */
743static const VMStateDescription icount_vmstate_timers = {
744 .name = "timer/icount",
745 .version_id = 1,
746 .minimum_version_id = 1,
5cd8cada 747 .needed = icount_state_needed,
d09eae37
FK
748 .fields = (VMStateField[]) {
749 VMSTATE_INT64(qemu_icount_bias, TimersState),
750 VMSTATE_INT64(qemu_icount, TimersState),
751 VMSTATE_END_OF_LIST()
b39e3f34
PD
752 },
753 .subsections = (const VMStateDescription*[]) {
754 &icount_vmstate_warp_timer,
755 &icount_vmstate_adjust_timers,
756 NULL
d09eae37
FK
757 }
758};
759
946fb27c
PB
760static const VMStateDescription vmstate_timers = {
761 .name = "timer",
762 .version_id = 2,
763 .minimum_version_id = 1,
35d08458 764 .fields = (VMStateField[]) {
946fb27c 765 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 766 VMSTATE_UNUSED(8),
946fb27c
PB
767 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
768 VMSTATE_END_OF_LIST()
d09eae37 769 },
5cd8cada
JQ
770 .subsections = (const VMStateDescription*[]) {
771 &icount_vmstate_timers,
772 NULL
946fb27c
PB
773 }
774};
775
14e6fe12 776static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 777{
2adcc85d
JH
778 double pct;
779 double throttle_ratio;
780 long sleeptime_ns;
781
782 if (!cpu_throttle_get_percentage()) {
783 return;
784 }
785
786 pct = (double)cpu_throttle_get_percentage()/100;
787 throttle_ratio = pct / (1 - pct);
788 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
789
790 qemu_mutex_unlock_iothread();
2adcc85d
JH
791 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
792 qemu_mutex_lock_iothread();
90bb0c04 793 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
794}
795
796static void cpu_throttle_timer_tick(void *opaque)
797{
798 CPUState *cpu;
799 double pct;
800
801 /* Stop the timer if needed */
802 if (!cpu_throttle_get_percentage()) {
803 return;
804 }
805 CPU_FOREACH(cpu) {
806 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
807 async_run_on_cpu(cpu, cpu_throttle_thread,
808 RUN_ON_CPU_NULL);
2adcc85d
JH
809 }
810 }
811
812 pct = (double)cpu_throttle_get_percentage()/100;
813 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
814 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
815}
816
817void cpu_throttle_set(int new_throttle_pct)
818{
819 /* Ensure throttle percentage is within valid range */
820 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
821 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
822
823 atomic_set(&throttle_percentage, new_throttle_pct);
824
825 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
826 CPU_THROTTLE_TIMESLICE_NS);
827}
828
829void cpu_throttle_stop(void)
830{
831 atomic_set(&throttle_percentage, 0);
832}
833
834bool cpu_throttle_active(void)
835{
836 return (cpu_throttle_get_percentage() != 0);
837}
838
839int cpu_throttle_get_percentage(void)
840{
841 return atomic_read(&throttle_percentage);
842}
843
4603ea01
PD
844void cpu_ticks_init(void)
845{
ccdb3c1f 846 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 847 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 848 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
849 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
850 cpu_throttle_timer_tick, NULL);
4603ea01
PD
851}
852
1ad9580b 853void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 854{
1ad9580b 855 const char *option;
a8bfac37 856 char *rem_str = NULL;
1ad9580b 857
1ad9580b 858 option = qemu_opt_get(opts, "shift");
946fb27c 859 if (!option) {
a8bfac37
ST
860 if (qemu_opt_get(opts, "align") != NULL) {
861 error_setg(errp, "Please specify shift option when using align");
862 }
946fb27c
PB
863 return;
864 }
f1f4b57e
VC
865
866 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9 867 if (icount_sleep) {
b39e3f34 868 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 869 icount_timer_cb, NULL);
5045e9d9 870 }
f1f4b57e 871
a8bfac37 872 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
873
874 if (icount_align_option && !icount_sleep) {
778d9f9b 875 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 876 }
946fb27c 877 if (strcmp(option, "auto") != 0) {
a8bfac37 878 errno = 0;
c1ff073c 879 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
a8bfac37
ST
880 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
881 error_setg(errp, "icount: Invalid shift value");
882 }
946fb27c
PB
883 use_icount = 1;
884 return;
a8bfac37
ST
885 } else if (icount_align_option) {
886 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 887 } else if (!icount_sleep) {
778d9f9b 888 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
889 }
890
891 use_icount = 2;
892
893 /* 125MIPS seems a reasonable initial guess at the guest speed.
894 It will be corrected fairly quickly anyway. */
c1ff073c 895 timers_state.icount_time_shift = 3;
946fb27c
PB
896
897 /* Have both realtime and virtual time triggers for speed adjustment.
898 The realtime trigger catches emulated time passing too slowly,
899 the virtual time trigger catches emulated time passing too fast.
900 Realtime triggers occur even when idle, so use them less frequently
901 than VM triggers. */
b39e3f34
PD
902 timers_state.vm_clock_warp_start = -1;
903 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 904 icount_adjust_rt, NULL);
b39e3f34 905 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 906 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 907 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 908 icount_adjust_vm, NULL);
b39e3f34 909 timer_mod(timers_state.icount_vm_timer,
40daca54 910 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 911 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
912}
913
6546706d
AB
914/***********************************************************/
915/* TCG vCPU kick timer
916 *
917 * The kick timer is responsible for moving single threaded vCPU
918 * emulation on to the next vCPU. If more than one vCPU is running a
919 * timer event with force a cpu->exit so the next vCPU can get
920 * scheduled.
921 *
922 * The timer is removed if all vCPUs are idle and restarted again once
923 * idleness is complete.
924 */
925
926static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 927static CPUState *tcg_current_rr_cpu;
6546706d
AB
928
929#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
930
931static inline int64_t qemu_tcg_next_kick(void)
932{
933 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
934}
935
791158d9
AB
936/* Kick the currently round-robin scheduled vCPU */
937static void qemu_cpu_kick_rr_cpu(void)
938{
939 CPUState *cpu;
791158d9
AB
940 do {
941 cpu = atomic_mb_read(&tcg_current_rr_cpu);
942 if (cpu) {
943 cpu_exit(cpu);
944 }
945 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
946}
947
6b8f0187
PB
948static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
949{
950}
951
3f53bc61
PB
952void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
953{
6b8f0187
PB
954 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
955 qemu_notify_event();
956 return;
957 }
958
c52e7132
PM
959 if (qemu_in_vcpu_thread()) {
960 /* A CPU is currently running; kick it back out to the
961 * tcg_cpu_exec() loop so it will recalculate its
962 * icount deadline immediately.
963 */
964 qemu_cpu_kick(current_cpu);
965 } else if (first_cpu) {
6b8f0187
PB
966 /* qemu_cpu_kick is not enough to kick a halted CPU out of
967 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
968 * causes cpu_thread_is_idle to return false. This way,
969 * handle_icount_deadline can run.
c52e7132
PM
970 * If we have no CPUs at all for some reason, we don't
971 * need to do anything.
6b8f0187
PB
972 */
973 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
974 }
3f53bc61
PB
975}
976
6546706d
AB
977static void kick_tcg_thread(void *opaque)
978{
979 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 980 qemu_cpu_kick_rr_cpu();
6546706d
AB
981}
982
983static void start_tcg_kick_timer(void)
984{
db08b687
PB
985 assert(!mttcg_enabled);
986 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
987 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
988 kick_tcg_thread, NULL);
1926ab27
AB
989 }
990 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
991 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
992 }
993}
994
995static void stop_tcg_kick_timer(void)
996{
db08b687 997 assert(!mttcg_enabled);
1926ab27 998 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 999 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
1000 }
1001}
1002
296af7c9
BS
1003/***********************************************************/
1004void hw_error(const char *fmt, ...)
1005{
1006 va_list ap;
55e5c285 1007 CPUState *cpu;
296af7c9
BS
1008
1009 va_start(ap, fmt);
1010 fprintf(stderr, "qemu: hardware error: ");
1011 vfprintf(stderr, fmt, ap);
1012 fprintf(stderr, "\n");
bdc44640 1013 CPU_FOREACH(cpu) {
55e5c285 1014 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 1015 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
1016 }
1017 va_end(ap);
1018 abort();
1019}
1020
1021void cpu_synchronize_all_states(void)
1022{
182735ef 1023 CPUState *cpu;
296af7c9 1024
bdc44640 1025 CPU_FOREACH(cpu) {
182735ef 1026 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
1027 /* TODO: move to cpu_synchronize_state() */
1028 if (hvf_enabled()) {
1029 hvf_cpu_synchronize_state(cpu);
1030 }
296af7c9
BS
1031 }
1032}
1033
1034void cpu_synchronize_all_post_reset(void)
1035{
182735ef 1036 CPUState *cpu;
296af7c9 1037
bdc44640 1038 CPU_FOREACH(cpu) {
182735ef 1039 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1040 /* TODO: move to cpu_synchronize_post_reset() */
1041 if (hvf_enabled()) {
1042 hvf_cpu_synchronize_post_reset(cpu);
1043 }
296af7c9
BS
1044 }
1045}
1046
1047void cpu_synchronize_all_post_init(void)
1048{
182735ef 1049 CPUState *cpu;
296af7c9 1050
bdc44640 1051 CPU_FOREACH(cpu) {
182735ef 1052 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1053 /* TODO: move to cpu_synchronize_post_init() */
1054 if (hvf_enabled()) {
1055 hvf_cpu_synchronize_post_init(cpu);
1056 }
296af7c9
BS
1057 }
1058}
1059
75e972da
DG
1060void cpu_synchronize_all_pre_loadvm(void)
1061{
1062 CPUState *cpu;
1063
1064 CPU_FOREACH(cpu) {
1065 cpu_synchronize_pre_loadvm(cpu);
1066 }
1067}
1068
4486e89c 1069static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1070{
56983463
KW
1071 int ret = 0;
1072
1354869c 1073 if (runstate_is_running()) {
296af7c9 1074 cpu_disable_ticks();
296af7c9 1075 pause_all_vcpus();
f5bbfba1 1076 runstate_set(state);
1dfb4dd9 1077 vm_state_notify(0, state);
4486e89c 1078 if (send_stop) {
3ab72385 1079 qapi_event_send_stop();
4486e89c 1080 }
296af7c9 1081 }
56983463 1082
594a45ce 1083 bdrv_drain_all();
6d0ceb80 1084 replay_disable_events();
22af08ea 1085 ret = bdrv_flush_all();
594a45ce 1086
56983463 1087 return ret;
296af7c9
BS
1088}
1089
4486e89c
SH
1090/* Special vm_stop() variant for terminating the process. Historically clients
1091 * did not expect a QMP STOP event and so we need to retain compatibility.
1092 */
1093int vm_shutdown(void)
1094{
1095 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1096}
1097
a1fcaa73 1098static bool cpu_can_run(CPUState *cpu)
296af7c9 1099{
4fdeee7c 1100 if (cpu->stop) {
a1fcaa73 1101 return false;
0ab07c62 1102 }
321bc0b2 1103 if (cpu_is_stopped(cpu)) {
a1fcaa73 1104 return false;
0ab07c62 1105 }
a1fcaa73 1106 return true;
296af7c9
BS
1107}
1108
91325046 1109static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1110{
64f6b346 1111 gdb_set_stop_cpu(cpu);
8cf71710 1112 qemu_system_debug_request();
f324e766 1113 cpu->stopped = true;
3c638d06
JK
1114}
1115
6d9cb73c
JK
1116#ifdef CONFIG_LINUX
1117static void sigbus_reraise(void)
1118{
1119 sigset_t set;
1120 struct sigaction action;
1121
1122 memset(&action, 0, sizeof(action));
1123 action.sa_handler = SIG_DFL;
1124 if (!sigaction(SIGBUS, &action, NULL)) {
1125 raise(SIGBUS);
1126 sigemptyset(&set);
1127 sigaddset(&set, SIGBUS);
a2d1761d 1128 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1129 }
1130 perror("Failed to re-raise SIGBUS!\n");
1131 abort();
1132}
1133
d98d4072 1134static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1135{
a16fc07e
PB
1136 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1137 sigbus_reraise();
1138 }
1139
2ae41db2
PB
1140 if (current_cpu) {
1141 /* Called asynchronously in VCPU thread. */
1142 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1143 sigbus_reraise();
1144 }
1145 } else {
1146 /* Called synchronously (via signalfd) in main thread. */
1147 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1148 sigbus_reraise();
1149 }
6d9cb73c
JK
1150 }
1151}
1152
1153static void qemu_init_sigbus(void)
1154{
1155 struct sigaction action;
1156
1157 memset(&action, 0, sizeof(action));
1158 action.sa_flags = SA_SIGINFO;
d98d4072 1159 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1160 sigaction(SIGBUS, &action, NULL);
1161
1162 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1163}
6d9cb73c 1164#else /* !CONFIG_LINUX */
6d9cb73c
JK
1165static void qemu_init_sigbus(void)
1166{
1167}
a16fc07e 1168#endif /* !CONFIG_LINUX */
ff48eb5f 1169
b2532d88 1170static QemuMutex qemu_global_mutex;
296af7c9
BS
1171
1172static QemuThread io_thread;
1173
296af7c9
BS
1174/* cpu creation */
1175static QemuCond qemu_cpu_cond;
1176/* system init */
296af7c9
BS
1177static QemuCond qemu_pause_cond;
1178
d3b12f5d 1179void qemu_init_cpu_loop(void)
296af7c9 1180{
6d9cb73c 1181 qemu_init_sigbus();
ed94592b 1182 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1183 qemu_cond_init(&qemu_pause_cond);
296af7c9 1184 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1185
b7680cb6 1186 qemu_thread_get_self(&io_thread);
296af7c9
BS
1187}
1188
14e6fe12 1189void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1190{
d148d90e 1191 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1192}
1193
4c055ab5
GZ
1194static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1195{
1196 if (kvm_destroy_vcpu(cpu) < 0) {
1197 error_report("kvm_destroy_vcpu failed");
1198 exit(EXIT_FAILURE);
1199 }
1200}
1201
1202static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1203{
1204}
1205
ebd05fea
DH
1206static void qemu_cpu_stop(CPUState *cpu, bool exit)
1207{
1208 g_assert(qemu_cpu_is_self(cpu));
1209 cpu->stop = false;
1210 cpu->stopped = true;
1211 if (exit) {
1212 cpu_exit(cpu);
1213 }
1214 qemu_cond_broadcast(&qemu_pause_cond);
1215}
1216
509a0d78 1217static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1218{
37257942 1219 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1220 if (cpu->stop) {
ebd05fea 1221 qemu_cpu_stop(cpu, false);
296af7c9 1222 }
a5403c69 1223 process_queued_cpu_work(cpu);
37257942
AB
1224}
1225
a8efa606 1226static void qemu_tcg_rr_wait_io_event(void)
37257942 1227{
a8efa606
PB
1228 CPUState *cpu;
1229
db08b687 1230 while (all_cpu_threads_idle()) {
6546706d 1231 stop_tcg_kick_timer();
a8efa606 1232 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1233 }
296af7c9 1234
6546706d
AB
1235 start_tcg_kick_timer();
1236
a8efa606
PB
1237 CPU_FOREACH(cpu) {
1238 qemu_wait_io_event_common(cpu);
1239 }
296af7c9
BS
1240}
1241
db08b687 1242static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1243{
a98ae1d8 1244 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1245 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1246 }
296af7c9 1247
db08b687
PB
1248#ifdef _WIN32
1249 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1250 if (!tcg_enabled()) {
1251 SleepEx(0, TRUE);
c97d6d2c 1252 }
db08b687 1253#endif
c97d6d2c
SAGDR
1254 qemu_wait_io_event_common(cpu);
1255}
1256
7e97cd88 1257static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1258{
48a106bd 1259 CPUState *cpu = arg;
84b4915d 1260 int r;
296af7c9 1261
ab28bd23
PB
1262 rcu_register_thread();
1263
2e7f7a3c 1264 qemu_mutex_lock_iothread();
814e612e 1265 qemu_thread_get_self(cpu->thread);
9f09e18a 1266 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1267 cpu->can_do_io = 1;
4917cf44 1268 current_cpu = cpu;
296af7c9 1269
504134d2 1270 r = kvm_init_vcpu(cpu);
84b4915d 1271 if (r < 0) {
493d89bf 1272 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1273 exit(1);
1274 }
296af7c9 1275
18268b60 1276 kvm_init_cpu_signals(cpu);
296af7c9
BS
1277
1278 /* signal CPU creation */
61a46217 1279 cpu->created = true;
296af7c9 1280 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1281 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1282
4c055ab5 1283 do {
a1fcaa73 1284 if (cpu_can_run(cpu)) {
1458c363 1285 r = kvm_cpu_exec(cpu);
83f338f7 1286 if (r == EXCP_DEBUG) {
91325046 1287 cpu_handle_guest_debug(cpu);
83f338f7 1288 }
0ab07c62 1289 }
db08b687 1290 qemu_wait_io_event(cpu);
4c055ab5 1291 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1292
4c055ab5 1293 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1294 cpu->created = false;
1295 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1296 qemu_mutex_unlock_iothread();
57615ed5 1297 rcu_unregister_thread();
296af7c9
BS
1298 return NULL;
1299}
1300
c7f0f3b1
AL
1301static void *qemu_dummy_cpu_thread_fn(void *arg)
1302{
1303#ifdef _WIN32
493d89bf 1304 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1305 exit(1);
1306#else
10a9021d 1307 CPUState *cpu = arg;
c7f0f3b1
AL
1308 sigset_t waitset;
1309 int r;
1310
ab28bd23
PB
1311 rcu_register_thread();
1312
c7f0f3b1 1313 qemu_mutex_lock_iothread();
814e612e 1314 qemu_thread_get_self(cpu->thread);
9f09e18a 1315 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1316 cpu->can_do_io = 1;
37257942 1317 current_cpu = cpu;
c7f0f3b1
AL
1318
1319 sigemptyset(&waitset);
1320 sigaddset(&waitset, SIG_IPI);
1321
1322 /* signal CPU creation */
61a46217 1323 cpu->created = true;
c7f0f3b1 1324 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1325 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1326
d2831ab0 1327 do {
c7f0f3b1
AL
1328 qemu_mutex_unlock_iothread();
1329 do {
1330 int sig;
1331 r = sigwait(&waitset, &sig);
1332 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1333 if (r == -1) {
1334 perror("sigwait");
1335 exit(1);
1336 }
1337 qemu_mutex_lock_iothread();
db08b687 1338 qemu_wait_io_event(cpu);
d2831ab0 1339 } while (!cpu->unplug);
c7f0f3b1 1340
d40bfcbb 1341 qemu_mutex_unlock_iothread();
d2831ab0 1342 rcu_unregister_thread();
c7f0f3b1
AL
1343 return NULL;
1344#endif
1345}
1346
1be7fcb8
AB
1347static int64_t tcg_get_icount_limit(void)
1348{
1349 int64_t deadline;
1350
1351 if (replay_mode != REPLAY_MODE_PLAY) {
1352 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1353
1354 /* Maintain prior (possibly buggy) behaviour where if no deadline
1355 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1356 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1357 * nanoseconds.
1358 */
1359 if ((deadline < 0) || (deadline > INT32_MAX)) {
1360 deadline = INT32_MAX;
1361 }
1362
1363 return qemu_icount_round(deadline);
1364 } else {
1365 return replay_get_instructions();
1366 }
1367}
1368
12e9700d
AB
1369static void handle_icount_deadline(void)
1370{
6b8f0187 1371 assert(qemu_in_vcpu_thread());
12e9700d
AB
1372 if (use_icount) {
1373 int64_t deadline =
1374 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1375
1376 if (deadline == 0) {
6b8f0187 1377 /* Wake up other AioContexts. */
12e9700d 1378 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1379 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1380 }
1381 }
1382}
1383
05248382 1384static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1385{
1be7fcb8 1386 if (use_icount) {
eda5f7c6 1387 int insns_left;
05248382
AB
1388
1389 /* These should always be cleared by process_icount_data after
1390 * each vCPU execution. However u16.high can be raised
1391 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1392 */
5e140196 1393 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1394 g_assert(cpu->icount_extra == 0);
1395
eda5f7c6
AB
1396 cpu->icount_budget = tcg_get_icount_limit();
1397 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1398 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1399 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1400
1401 replay_mutex_lock();
1be7fcb8 1402 }
05248382
AB
1403}
1404
1405static void process_icount_data(CPUState *cpu)
1406{
1be7fcb8 1407 if (use_icount) {
e4cd9657 1408 /* Account for executed instructions */
512d3c80 1409 cpu_update_icount(cpu);
05248382
AB
1410
1411 /* Reset the counters */
5e140196 1412 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1413 cpu->icount_extra = 0;
e4cd9657
AB
1414 cpu->icount_budget = 0;
1415
1be7fcb8 1416 replay_account_executed_instructions();
d759c951
AB
1417
1418 replay_mutex_unlock();
1be7fcb8 1419 }
05248382
AB
1420}
1421
1422
1423static int tcg_cpu_exec(CPUState *cpu)
1424{
1425 int ret;
1426#ifdef CONFIG_PROFILER
1427 int64_t ti;
1428#endif
1429
f28d0dfd 1430 assert(tcg_enabled());
05248382
AB
1431#ifdef CONFIG_PROFILER
1432 ti = profile_getclock();
1433#endif
05248382
AB
1434 cpu_exec_start(cpu);
1435 ret = cpu_exec(cpu);
1436 cpu_exec_end(cpu);
05248382 1437#ifdef CONFIG_PROFILER
72fd2efb
EC
1438 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1439 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1440#endif
1be7fcb8
AB
1441 return ret;
1442}
1443
c93bbbef
AB
1444/* Destroy any remaining vCPUs which have been unplugged and have
1445 * finished running
1446 */
1447static void deal_with_unplugged_cpus(void)
1be7fcb8 1448{
c93bbbef 1449 CPUState *cpu;
1be7fcb8 1450
c93bbbef
AB
1451 CPU_FOREACH(cpu) {
1452 if (cpu->unplug && !cpu_can_run(cpu)) {
1453 qemu_tcg_destroy_vcpu(cpu);
1454 cpu->created = false;
1455 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1456 break;
1457 }
1458 }
1be7fcb8 1459}
bdb7ca67 1460
6546706d
AB
1461/* Single-threaded TCG
1462 *
1463 * In the single-threaded case each vCPU is simulated in turn. If
1464 * there is more than a single vCPU we create a simple timer to kick
1465 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1466 * This is done explicitly rather than relying on side-effects
1467 * elsewhere.
1468 */
1469
37257942 1470static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1471{
c3586ba7 1472 CPUState *cpu = arg;
296af7c9 1473
f28d0dfd 1474 assert(tcg_enabled());
ab28bd23 1475 rcu_register_thread();
3468b59e 1476 tcg_register_thread();
ab28bd23 1477
2e7f7a3c 1478 qemu_mutex_lock_iothread();
814e612e 1479 qemu_thread_get_self(cpu->thread);
296af7c9 1480
5a9c973b
DH
1481 cpu->thread_id = qemu_get_thread_id();
1482 cpu->created = true;
1483 cpu->can_do_io = 1;
296af7c9 1484 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1485 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1486
fa7d1867 1487 /* wait for initial kick-off after machine start */
c28e399c 1488 while (first_cpu->stopped) {
d5f8d613 1489 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1490
1491 /* process any pending work */
bdc44640 1492 CPU_FOREACH(cpu) {
37257942 1493 current_cpu = cpu;
182735ef 1494 qemu_wait_io_event_common(cpu);
8e564b4e 1495 }
0ab07c62 1496 }
296af7c9 1497
6546706d
AB
1498 start_tcg_kick_timer();
1499
c93bbbef
AB
1500 cpu = first_cpu;
1501
e5143e30
AB
1502 /* process any pending work */
1503 cpu->exit_request = 1;
1504
296af7c9 1505 while (1) {
d759c951
AB
1506 qemu_mutex_unlock_iothread();
1507 replay_mutex_lock();
1508 qemu_mutex_lock_iothread();
c93bbbef
AB
1509 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1510 qemu_account_warp_timer();
1511
6b8f0187
PB
1512 /* Run the timers here. This is much more efficient than
1513 * waking up the I/O thread and waiting for completion.
1514 */
1515 handle_icount_deadline();
1516
d759c951
AB
1517 replay_mutex_unlock();
1518
c93bbbef
AB
1519 if (!cpu) {
1520 cpu = first_cpu;
1521 }
1522
e5143e30
AB
1523 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1524
791158d9 1525 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1526 current_cpu = cpu;
c93bbbef
AB
1527
1528 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1529 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1530
1531 if (cpu_can_run(cpu)) {
1532 int r;
05248382 1533
d759c951 1534 qemu_mutex_unlock_iothread();
05248382
AB
1535 prepare_icount_for_run(cpu);
1536
c93bbbef 1537 r = tcg_cpu_exec(cpu);
05248382
AB
1538
1539 process_icount_data(cpu);
d759c951 1540 qemu_mutex_lock_iothread();
05248382 1541
c93bbbef
AB
1542 if (r == EXCP_DEBUG) {
1543 cpu_handle_guest_debug(cpu);
1544 break;
08e73c48
PK
1545 } else if (r == EXCP_ATOMIC) {
1546 qemu_mutex_unlock_iothread();
1547 cpu_exec_step_atomic(cpu);
1548 qemu_mutex_lock_iothread();
1549 break;
c93bbbef 1550 }
37257942 1551 } else if (cpu->stop) {
c93bbbef
AB
1552 if (cpu->unplug) {
1553 cpu = CPU_NEXT(cpu);
1554 }
1555 break;
1556 }
1557
e5143e30
AB
1558 cpu = CPU_NEXT(cpu);
1559 } /* while (cpu && !cpu->exit_request).. */
1560
791158d9
AB
1561 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1562 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1563
e5143e30
AB
1564 if (cpu && cpu->exit_request) {
1565 atomic_mb_set(&cpu->exit_request, 0);
1566 }
ac70aafc 1567
013aabdc
CD
1568 if (use_icount && all_cpu_threads_idle()) {
1569 /*
1570 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1571 * in the main_loop, wake it up in order to start the warp timer.
1572 */
1573 qemu_notify_event();
1574 }
1575
a8efa606 1576 qemu_tcg_rr_wait_io_event();
c93bbbef 1577 deal_with_unplugged_cpus();
296af7c9
BS
1578 }
1579
9b0605f9 1580 rcu_unregister_thread();
296af7c9
BS
1581 return NULL;
1582}
1583
b0cb0a66
VP
1584static void *qemu_hax_cpu_thread_fn(void *arg)
1585{
1586 CPUState *cpu = arg;
1587 int r;
b3d3a426 1588
9857c2d2 1589 rcu_register_thread();
b3d3a426 1590 qemu_mutex_lock_iothread();
b0cb0a66 1591 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1592
1593 cpu->thread_id = qemu_get_thread_id();
1594 cpu->created = true;
b0cb0a66
VP
1595 current_cpu = cpu;
1596
1597 hax_init_vcpu(cpu);
1598 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1599 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1600
9857c2d2 1601 do {
b0cb0a66
VP
1602 if (cpu_can_run(cpu)) {
1603 r = hax_smp_cpu_exec(cpu);
1604 if (r == EXCP_DEBUG) {
1605 cpu_handle_guest_debug(cpu);
1606 }
1607 }
1608
db08b687 1609 qemu_wait_io_event(cpu);
9857c2d2
PB
1610 } while (!cpu->unplug || cpu_can_run(cpu));
1611 rcu_unregister_thread();
b0cb0a66
VP
1612 return NULL;
1613}
1614
c97d6d2c
SAGDR
1615/* The HVF-specific vCPU thread function. This one should only run when the host
1616 * CPU supports the VMX "unrestricted guest" feature. */
1617static void *qemu_hvf_cpu_thread_fn(void *arg)
1618{
1619 CPUState *cpu = arg;
1620
1621 int r;
1622
1623 assert(hvf_enabled());
1624
1625 rcu_register_thread();
1626
1627 qemu_mutex_lock_iothread();
1628 qemu_thread_get_self(cpu->thread);
1629
1630 cpu->thread_id = qemu_get_thread_id();
1631 cpu->can_do_io = 1;
1632 current_cpu = cpu;
1633
1634 hvf_init_vcpu(cpu);
1635
1636 /* signal CPU creation */
1637 cpu->created = true;
1638 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1639 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1640
1641 do {
1642 if (cpu_can_run(cpu)) {
1643 r = hvf_vcpu_exec(cpu);
1644 if (r == EXCP_DEBUG) {
1645 cpu_handle_guest_debug(cpu);
1646 }
1647 }
db08b687 1648 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1649 } while (!cpu->unplug || cpu_can_run(cpu));
1650
1651 hvf_vcpu_destroy(cpu);
1652 cpu->created = false;
1653 qemu_cond_signal(&qemu_cpu_cond);
1654 qemu_mutex_unlock_iothread();
8178e637 1655 rcu_unregister_thread();
c97d6d2c
SAGDR
1656 return NULL;
1657}
1658
19306806
JTV
1659static void *qemu_whpx_cpu_thread_fn(void *arg)
1660{
1661 CPUState *cpu = arg;
1662 int r;
1663
1664 rcu_register_thread();
1665
1666 qemu_mutex_lock_iothread();
1667 qemu_thread_get_self(cpu->thread);
1668 cpu->thread_id = qemu_get_thread_id();
1669 current_cpu = cpu;
1670
1671 r = whpx_init_vcpu(cpu);
1672 if (r < 0) {
1673 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1674 exit(1);
1675 }
1676
1677 /* signal CPU creation */
1678 cpu->created = true;
1679 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1680 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1681
1682 do {
1683 if (cpu_can_run(cpu)) {
1684 r = whpx_vcpu_exec(cpu);
1685 if (r == EXCP_DEBUG) {
1686 cpu_handle_guest_debug(cpu);
1687 }
1688 }
1689 while (cpu_thread_is_idle(cpu)) {
1690 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1691 }
1692 qemu_wait_io_event_common(cpu);
1693 } while (!cpu->unplug || cpu_can_run(cpu));
1694
1695 whpx_destroy_vcpu(cpu);
1696 cpu->created = false;
1697 qemu_cond_signal(&qemu_cpu_cond);
1698 qemu_mutex_unlock_iothread();
1699 rcu_unregister_thread();
c97d6d2c
SAGDR
1700 return NULL;
1701}
1702
b0cb0a66
VP
1703#ifdef _WIN32
1704static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1705{
1706}
1707#endif
1708
37257942
AB
1709/* Multi-threaded TCG
1710 *
1711 * In the multi-threaded case each vCPU has its own thread. The TLS
1712 * variable current_cpu can be used deep in the code to find the
1713 * current CPUState for a given thread.
1714 */
1715
1716static void *qemu_tcg_cpu_thread_fn(void *arg)
1717{
1718 CPUState *cpu = arg;
1719
f28d0dfd 1720 assert(tcg_enabled());
bf51c720
AB
1721 g_assert(!use_icount);
1722
37257942 1723 rcu_register_thread();
3468b59e 1724 tcg_register_thread();
37257942
AB
1725
1726 qemu_mutex_lock_iothread();
1727 qemu_thread_get_self(cpu->thread);
1728
1729 cpu->thread_id = qemu_get_thread_id();
1730 cpu->created = true;
1731 cpu->can_do_io = 1;
1732 current_cpu = cpu;
1733 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1734 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1735
1736 /* process any pending work */
1737 cpu->exit_request = 1;
1738
54961aac 1739 do {
37257942
AB
1740 if (cpu_can_run(cpu)) {
1741 int r;
d759c951 1742 qemu_mutex_unlock_iothread();
37257942 1743 r = tcg_cpu_exec(cpu);
d759c951 1744 qemu_mutex_lock_iothread();
37257942
AB
1745 switch (r) {
1746 case EXCP_DEBUG:
1747 cpu_handle_guest_debug(cpu);
1748 break;
1749 case EXCP_HALTED:
1750 /* during start-up the vCPU is reset and the thread is
1751 * kicked several times. If we don't ensure we go back
1752 * to sleep in the halted state we won't cleanly
1753 * start-up when the vCPU is enabled.
1754 *
1755 * cpu->halted should ensure we sleep in wait_io_event
1756 */
1757 g_assert(cpu->halted);
1758 break;
08e73c48
PK
1759 case EXCP_ATOMIC:
1760 qemu_mutex_unlock_iothread();
1761 cpu_exec_step_atomic(cpu);
1762 qemu_mutex_lock_iothread();
37257942
AB
1763 default:
1764 /* Ignore everything else? */
1765 break;
1766 }
1767 }
1768
37257942 1769 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1770 qemu_wait_io_event(cpu);
9b0605f9 1771 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1772
9b0605f9
PB
1773 qemu_tcg_destroy_vcpu(cpu);
1774 cpu->created = false;
1775 qemu_cond_signal(&qemu_cpu_cond);
1776 qemu_mutex_unlock_iothread();
1777 rcu_unregister_thread();
37257942
AB
1778 return NULL;
1779}
1780
2ff09a40 1781static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1782{
1783#ifndef _WIN32
1784 int err;
1785
e0c38211
PB
1786 if (cpu->thread_kicked) {
1787 return;
9102deda 1788 }
e0c38211 1789 cpu->thread_kicked = true;
814e612e 1790 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1791 if (err && err != ESRCH) {
cc015e9a
PB
1792 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1793 exit(1);
1794 }
1795#else /* _WIN32 */
b0cb0a66 1796 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1797 if (whpx_enabled()) {
1798 whpx_vcpu_kick(cpu);
1799 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1800 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1801 __func__, GetLastError());
1802 exit(1);
1803 }
1804 }
e0c38211
PB
1805#endif
1806}
ed9164a3 1807
c08d7424 1808void qemu_cpu_kick(CPUState *cpu)
296af7c9 1809{
f5c121b8 1810 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1811 if (tcg_enabled()) {
791158d9 1812 cpu_exit(cpu);
37257942 1813 /* NOP unless doing single-thread RR */
791158d9 1814 qemu_cpu_kick_rr_cpu();
e0c38211 1815 } else {
b0cb0a66
VP
1816 if (hax_enabled()) {
1817 /*
1818 * FIXME: race condition with the exit_request check in
1819 * hax_vcpu_hax_exec
1820 */
1821 cpu->exit_request = 1;
1822 }
e0c38211
PB
1823 qemu_cpu_kick_thread(cpu);
1824 }
296af7c9
BS
1825}
1826
46d62fac 1827void qemu_cpu_kick_self(void)
296af7c9 1828{
4917cf44 1829 assert(current_cpu);
9102deda 1830 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1831}
1832
60e82579 1833bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1834{
814e612e 1835 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1836}
1837
79e2b9ae 1838bool qemu_in_vcpu_thread(void)
aa723c23 1839{
4917cf44 1840 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1841}
1842
afbe7053
PB
1843static __thread bool iothread_locked = false;
1844
1845bool qemu_mutex_iothread_locked(void)
1846{
1847 return iothread_locked;
1848}
1849
cb764d06
EC
1850/*
1851 * The BQL is taken from so many places that it is worth profiling the
1852 * callers directly, instead of funneling them all through a single function.
1853 */
1854void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1855{
cb764d06
EC
1856 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1857
8d04fb55 1858 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1859 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1860 iothread_locked = true;
296af7c9
BS
1861}
1862
1863void qemu_mutex_unlock_iothread(void)
1864{
8d04fb55 1865 g_assert(qemu_mutex_iothread_locked());
afbe7053 1866 iothread_locked = false;
296af7c9
BS
1867 qemu_mutex_unlock(&qemu_global_mutex);
1868}
1869
e8faee06 1870static bool all_vcpus_paused(void)
296af7c9 1871{
bdc44640 1872 CPUState *cpu;
296af7c9 1873
bdc44640 1874 CPU_FOREACH(cpu) {
182735ef 1875 if (!cpu->stopped) {
e8faee06 1876 return false;
0ab07c62 1877 }
296af7c9
BS
1878 }
1879
e8faee06 1880 return true;
296af7c9
BS
1881}
1882
1883void pause_all_vcpus(void)
1884{
bdc44640 1885 CPUState *cpu;
296af7c9 1886
40daca54 1887 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1888 CPU_FOREACH(cpu) {
ebd05fea
DH
1889 if (qemu_cpu_is_self(cpu)) {
1890 qemu_cpu_stop(cpu, true);
1891 } else {
1892 cpu->stop = true;
1893 qemu_cpu_kick(cpu);
1894 }
d798e974
JK
1895 }
1896
d759c951
AB
1897 /* We need to drop the replay_lock so any vCPU threads woken up
1898 * can finish their replay tasks
1899 */
1900 replay_mutex_unlock();
1901
296af7c9 1902 while (!all_vcpus_paused()) {
be7d6c57 1903 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1904 CPU_FOREACH(cpu) {
182735ef 1905 qemu_cpu_kick(cpu);
296af7c9
BS
1906 }
1907 }
d759c951
AB
1908
1909 qemu_mutex_unlock_iothread();
1910 replay_mutex_lock();
1911 qemu_mutex_lock_iothread();
296af7c9
BS
1912}
1913
2993683b
IM
1914void cpu_resume(CPUState *cpu)
1915{
1916 cpu->stop = false;
1917 cpu->stopped = false;
1918 qemu_cpu_kick(cpu);
1919}
1920
296af7c9
BS
1921void resume_all_vcpus(void)
1922{
bdc44640 1923 CPUState *cpu;
296af7c9 1924
40daca54 1925 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1926 CPU_FOREACH(cpu) {
182735ef 1927 cpu_resume(cpu);
296af7c9
BS
1928 }
1929}
1930
dbadee4f 1931void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1932{
1933 cpu->stop = true;
1934 cpu->unplug = true;
1935 qemu_cpu_kick(cpu);
dbadee4f
PB
1936 qemu_mutex_unlock_iothread();
1937 qemu_thread_join(cpu->thread);
1938 qemu_mutex_lock_iothread();
2c579042
BR
1939}
1940
4900116e
DDAG
1941/* For temporary buffers for forming a name */
1942#define VCPU_THREAD_NAME_SIZE 16
1943
e5ab30a2 1944static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1945{
4900116e 1946 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1947 static QemuCond *single_tcg_halt_cond;
1948 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1949 static int tcg_region_inited;
1950
f28d0dfd 1951 assert(tcg_enabled());
e8feb96f
EC
1952 /*
1953 * Initialize TCG regions--once. Now is a good time, because:
1954 * (1) TCG's init context, prologue and target globals have been set up.
1955 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1956 * -accel flag is processed, so the check doesn't work then).
1957 */
1958 if (!tcg_region_inited) {
1959 tcg_region_inited = 1;
1960 tcg_region_init();
1961 }
4900116e 1962
37257942 1963 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1964 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1965 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1966 qemu_cond_init(cpu->halt_cond);
37257942
AB
1967
1968 if (qemu_tcg_mttcg_enabled()) {
1969 /* create a thread per vCPU with TCG (MTTCG) */
1970 parallel_cpus = true;
1971 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1972 cpu->cpu_index);
37257942
AB
1973
1974 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1975 cpu, QEMU_THREAD_JOINABLE);
1976
1977 } else {
1978 /* share a single thread for all cpus with TCG */
1979 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1980 qemu_thread_create(cpu->thread, thread_name,
1981 qemu_tcg_rr_cpu_thread_fn,
1982 cpu, QEMU_THREAD_JOINABLE);
1983
1984 single_tcg_halt_cond = cpu->halt_cond;
1985 single_tcg_cpu_thread = cpu->thread;
1986 }
1ecf47bf 1987#ifdef _WIN32
814e612e 1988 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1989#endif
296af7c9 1990 } else {
37257942
AB
1991 /* For non-MTTCG cases we share the thread */
1992 cpu->thread = single_tcg_cpu_thread;
1993 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1994 cpu->thread_id = first_cpu->thread_id;
1995 cpu->can_do_io = 1;
1996 cpu->created = true;
296af7c9
BS
1997 }
1998}
1999
b0cb0a66
VP
2000static void qemu_hax_start_vcpu(CPUState *cpu)
2001{
2002 char thread_name[VCPU_THREAD_NAME_SIZE];
2003
2004 cpu->thread = g_malloc0(sizeof(QemuThread));
2005 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2006 qemu_cond_init(cpu->halt_cond);
2007
2008 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2009 cpu->cpu_index);
2010 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2011 cpu, QEMU_THREAD_JOINABLE);
2012#ifdef _WIN32
2013 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2014#endif
b0cb0a66
VP
2015}
2016
48a106bd 2017static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2018{
4900116e
DDAG
2019 char thread_name[VCPU_THREAD_NAME_SIZE];
2020
814e612e 2021 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2022 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2023 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2024 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2025 cpu->cpu_index);
2026 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2027 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2028}
2029
c97d6d2c
SAGDR
2030static void qemu_hvf_start_vcpu(CPUState *cpu)
2031{
2032 char thread_name[VCPU_THREAD_NAME_SIZE];
2033
2034 /* HVF currently does not support TCG, and only runs in
2035 * unrestricted-guest mode. */
2036 assert(hvf_enabled());
2037
2038 cpu->thread = g_malloc0(sizeof(QemuThread));
2039 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2040 qemu_cond_init(cpu->halt_cond);
2041
2042 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2043 cpu->cpu_index);
2044 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2045 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2046}
2047
19306806
JTV
2048static void qemu_whpx_start_vcpu(CPUState *cpu)
2049{
2050 char thread_name[VCPU_THREAD_NAME_SIZE];
2051
2052 cpu->thread = g_malloc0(sizeof(QemuThread));
2053 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2054 qemu_cond_init(cpu->halt_cond);
2055 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2056 cpu->cpu_index);
2057 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2058 cpu, QEMU_THREAD_JOINABLE);
2059#ifdef _WIN32
2060 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2061#endif
19306806
JTV
2062}
2063
10a9021d 2064static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2065{
4900116e
DDAG
2066 char thread_name[VCPU_THREAD_NAME_SIZE];
2067
814e612e 2068 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2069 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2070 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2071 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2072 cpu->cpu_index);
2073 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2074 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2075}
2076
c643bed9 2077void qemu_init_vcpu(CPUState *cpu)
296af7c9 2078{
5cc8767d
LX
2079 MachineState *ms = MACHINE(qdev_get_machine());
2080
2081 cpu->nr_cores = ms->smp.cores;
2082 cpu->nr_threads = ms->smp.threads;
f324e766 2083 cpu->stopped = true;
9c09a251 2084 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2085
2086 if (!cpu->as) {
2087 /* If the target cpu hasn't set up any address spaces itself,
2088 * give it the default one.
2089 */
12ebc9a7 2090 cpu->num_ases = 1;
80ceb07a 2091 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2092 }
2093
0ab07c62 2094 if (kvm_enabled()) {
48a106bd 2095 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2096 } else if (hax_enabled()) {
2097 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2098 } else if (hvf_enabled()) {
2099 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2100 } else if (tcg_enabled()) {
e5ab30a2 2101 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2102 } else if (whpx_enabled()) {
2103 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2104 } else {
10a9021d 2105 qemu_dummy_start_vcpu(cpu);
0ab07c62 2106 }
81e96311
DH
2107
2108 while (!cpu->created) {
2109 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2110 }
296af7c9
BS
2111}
2112
b4a3d965 2113void cpu_stop_current(void)
296af7c9 2114{
4917cf44 2115 if (current_cpu) {
0ec7e677
PM
2116 current_cpu->stop = true;
2117 cpu_exit(current_cpu);
b4a3d965 2118 }
296af7c9
BS
2119}
2120
56983463 2121int vm_stop(RunState state)
296af7c9 2122{
aa723c23 2123 if (qemu_in_vcpu_thread()) {
74892d24 2124 qemu_system_vmstop_request_prepare();
1dfb4dd9 2125 qemu_system_vmstop_request(state);
296af7c9
BS
2126 /*
2127 * FIXME: should not return to device code in case
2128 * vm_stop() has been requested.
2129 */
b4a3d965 2130 cpu_stop_current();
56983463 2131 return 0;
296af7c9 2132 }
56983463 2133
4486e89c 2134 return do_vm_stop(state, true);
296af7c9
BS
2135}
2136
2d76e823
CI
2137/**
2138 * Prepare for (re)starting the VM.
2139 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2140 * running or in case of an error condition), 0 otherwise.
2141 */
2142int vm_prepare_start(void)
2143{
2144 RunState requested;
2d76e823
CI
2145
2146 qemu_vmstop_requested(&requested);
2147 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2148 return -1;
2149 }
2150
2151 /* Ensure that a STOP/RESUME pair of events is emitted if a
2152 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2153 * example, according to documentation is always followed by
2154 * the STOP event.
2155 */
2156 if (runstate_is_running()) {
3ab72385
PX
2157 qapi_event_send_stop();
2158 qapi_event_send_resume();
f056158d 2159 return -1;
2d76e823
CI
2160 }
2161
2162 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2163 qapi_event_send_resume();
f056158d
MA
2164
2165 replay_enable_events();
2166 cpu_enable_ticks();
2167 runstate_set(RUN_STATE_RUNNING);
2168 vm_state_notify(1, RUN_STATE_RUNNING);
2169 return 0;
2d76e823
CI
2170}
2171
2172void vm_start(void)
2173{
2174 if (!vm_prepare_start()) {
2175 resume_all_vcpus();
2176 }
2177}
2178
8a9236f1
LC
2179/* does a state transition even if the VM is already stopped,
2180 current state is forgotten forever */
56983463 2181int vm_stop_force_state(RunState state)
8a9236f1
LC
2182{
2183 if (runstate_is_running()) {
56983463 2184 return vm_stop(state);
8a9236f1
LC
2185 } else {
2186 runstate_set(state);
b2780d32
WC
2187
2188 bdrv_drain_all();
594a45ce
KW
2189 /* Make sure to return an error if the flush in a previous vm_stop()
2190 * failed. */
22af08ea 2191 return bdrv_flush_all();
8a9236f1
LC
2192 }
2193}
2194
0442428a 2195void list_cpus(const char *optarg)
262353cb
BS
2196{
2197 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2198#if defined(cpu_list)
0442428a 2199 cpu_list();
262353cb
BS
2200#endif
2201}
de0b36b6 2202
0cfd6a9a
LC
2203void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2204 bool has_cpu, int64_t cpu_index, Error **errp)
2205{
2206 FILE *f;
2207 uint32_t l;
55e5c285 2208 CPUState *cpu;
0cfd6a9a 2209 uint8_t buf[1024];
0dc9daf0 2210 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2211
2212 if (!has_cpu) {
2213 cpu_index = 0;
2214 }
2215
151d1322
AF
2216 cpu = qemu_get_cpu(cpu_index);
2217 if (cpu == NULL) {
c6bd8c70
MA
2218 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2219 "a CPU number");
0cfd6a9a
LC
2220 return;
2221 }
2222
2223 f = fopen(filename, "wb");
2224 if (!f) {
618da851 2225 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2226 return;
2227 }
2228
2229 while (size != 0) {
2230 l = sizeof(buf);
2231 if (l > size)
2232 l = size;
2f4d0f59 2233 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2234 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2235 " specified", orig_addr, orig_size);
2f4d0f59
AK
2236 goto exit;
2237 }
0cfd6a9a 2238 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2239 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2240 goto exit;
2241 }
2242 addr += l;
2243 size -= l;
2244 }
2245
2246exit:
2247 fclose(f);
2248}
6d3962bf
LC
2249
2250void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2251 Error **errp)
2252{
2253 FILE *f;
2254 uint32_t l;
2255 uint8_t buf[1024];
2256
2257 f = fopen(filename, "wb");
2258 if (!f) {
618da851 2259 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2260 return;
2261 }
2262
2263 while (size != 0) {
2264 l = sizeof(buf);
2265 if (l > size)
2266 l = size;
eb6282f2 2267 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2268 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2269 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2270 goto exit;
2271 }
2272 addr += l;
2273 size -= l;
2274 }
2275
2276exit:
2277 fclose(f);
2278}
ab49ab5c
LC
2279
2280void qmp_inject_nmi(Error **errp)
2281{
9cb805fd 2282 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2283}
27498bef 2284
76c86615 2285void dump_drift_info(void)
27498bef
ST
2286{
2287 if (!use_icount) {
2288 return;
2289 }
2290
76c86615 2291 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2292 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2293 if (icount_align_option) {
76c86615
MA
2294 qemu_printf("Max guest delay %"PRIi64" ms\n",
2295 -max_delay / SCALE_MS);
2296 qemu_printf("Max guest advance %"PRIi64" ms\n",
2297 max_advance / SCALE_MS);
27498bef 2298 } else {
76c86615
MA
2299 qemu_printf("Max guest delay NA\n");
2300 qemu_printf("Max guest advance NA\n");
27498bef
ST
2301 }
2302}