]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
migration/qemu-file: fix potential buf waste for extra buf_index adjustment
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
d6454270 28#include "migration/vmstate.h"
83c9089e 29#include "monitor/monitor.h"
e688df6b 30#include "qapi/error.h"
112ed241 31#include "qapi/qapi-commands-misc.h"
9af23989 32#include "qapi/qapi-events-run-state.h"
a4e15de9 33#include "qapi/qmp/qerror.h"
d49b6836 34#include "qemu/error-report.h"
76c86615 35#include "qemu/qemu-print.h"
14a48c1d 36#include "sysemu/tcg.h"
da31d594 37#include "sysemu/block-backend.h"
022c62cb 38#include "exec/gdbstub.h"
9c17d615 39#include "sysemu/dma.h"
b3946626 40#include "sysemu/hw_accel.h"
9c17d615 41#include "sysemu/kvm.h"
b0cb0a66 42#include "sysemu/hax.h"
c97d6d2c 43#include "sysemu/hvf.h"
19306806 44#include "sysemu/whpx.h"
63c91552 45#include "exec/exec-all.h"
296af7c9 46
1de7afc9 47#include "qemu/thread.h"
9c17d615
PB
48#include "sysemu/cpus.h"
49#include "sysemu/qtest.h"
1de7afc9 50#include "qemu/main-loop.h"
922a01a0 51#include "qemu/option.h"
1de7afc9 52#include "qemu/bitmap.h"
cb365646 53#include "qemu/seqlock.h"
9c09a251 54#include "qemu/guest-random.h"
8d4e9146 55#include "tcg.h"
9cb805fd 56#include "hw/nmi.h"
8b427044 57#include "sysemu/replay.h"
54d31236 58#include "sysemu/runstate.h"
5cc8767d 59#include "hw/boards.h"
650d103d 60#include "hw/hw.h"
0ff0fc19 61
6d9cb73c
JK
62#ifdef CONFIG_LINUX
63
64#include <sys/prctl.h>
65
c0532a76
MT
66#ifndef PR_MCE_KILL
67#define PR_MCE_KILL 33
68#endif
69
6d9cb73c
JK
70#ifndef PR_MCE_KILL_SET
71#define PR_MCE_KILL_SET 1
72#endif
73
74#ifndef PR_MCE_KILL_EARLY
75#define PR_MCE_KILL_EARLY 1
76#endif
77
78#endif /* CONFIG_LINUX */
79
27498bef
ST
80int64_t max_delay;
81int64_t max_advance;
296af7c9 82
2adcc85d
JH
83/* vcpu throttling controls */
84static QEMUTimer *throttle_timer;
85static unsigned int throttle_percentage;
86
87#define CPU_THROTTLE_PCT_MIN 1
88#define CPU_THROTTLE_PCT_MAX 99
89#define CPU_THROTTLE_TIMESLICE_NS 10000000
90
321bc0b2
TC
91bool cpu_is_stopped(CPUState *cpu)
92{
93 return cpu->stopped || !runstate_is_running();
94}
95
a98ae1d8 96static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 97{
c64ca814 98 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
99 return false;
100 }
321bc0b2 101 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
102 return true;
103 }
8c2e1b00 104 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 105 kvm_halt_in_kernel()) {
ac873f1e
PM
106 return false;
107 }
108 return true;
109}
110
111static bool all_cpu_threads_idle(void)
112{
182735ef 113 CPUState *cpu;
ac873f1e 114
bdc44640 115 CPU_FOREACH(cpu) {
182735ef 116 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
117 return false;
118 }
119 }
120 return true;
121}
122
946fb27c
PB
123/***********************************************************/
124/* guest cycle counter */
125
a3270e19
PB
126/* Protected by TimersState seqlock */
127
5045e9d9 128static bool icount_sleep = true;
946fb27c
PB
129/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
130#define MAX_ICOUNT_SHIFT 10
a3270e19 131
946fb27c 132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646 136
94377115
PB
137 /* Protect fields that can be respectively read outside the
138 * BQL, and written from multiple threads.
cb365646
LPF
139 */
140 QemuSeqLock vm_clock_seqlock;
94377115
PB
141 QemuSpin vm_clock_lock;
142
143 int16_t cpu_ticks_enabled;
c96778bb 144
c1ff073c 145 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
146 int16_t icount_time_shift;
147
c96778bb
FK
148 /* Compensate for varying guest execution speed. */
149 int64_t qemu_icount_bias;
94377115
PB
150
151 int64_t vm_clock_warp_start;
152 int64_t cpu_clock_offset;
153
c96778bb
FK
154 /* Only written by TCG thread */
155 int64_t qemu_icount;
94377115 156
b39e3f34 157 /* for adjusting icount */
b39e3f34
PD
158 QEMUTimer *icount_rt_timer;
159 QEMUTimer *icount_vm_timer;
160 QEMUTimer *icount_warp_timer;
946fb27c
PB
161} TimersState;
162
d9cd4007 163static TimersState timers_state;
8d4e9146
FK
164bool mttcg_enabled;
165
166/*
167 * We default to false if we know other options have been enabled
168 * which are currently incompatible with MTTCG. Otherwise when each
169 * guest (target) has been updated to support:
170 * - atomic instructions
171 * - memory ordering primitives (barriers)
172 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
173 *
174 * Once a guest architecture has been converted to the new primitives
175 * there are two remaining limitations to check.
176 *
177 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
178 * - The host must have a stronger memory order than the guest
179 *
180 * It may be possible in future to support strong guests on weak hosts
181 * but that will require tagging all load/stores in a guest with their
182 * implicit memory order requirements which would likely slow things
183 * down a lot.
184 */
185
186static bool check_tcg_memory_orders_compatible(void)
187{
188#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
189 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
190#else
191 return false;
192#endif
193}
194
195static bool default_mttcg_enabled(void)
196{
83fd9629 197 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
198 return false;
199 } else {
200#ifdef TARGET_SUPPORTS_MTTCG
201 return check_tcg_memory_orders_compatible();
202#else
203 return false;
204#endif
205 }
206}
207
208void qemu_tcg_configure(QemuOpts *opts, Error **errp)
209{
210 const char *t = qemu_opt_get(opts, "thread");
211 if (t) {
212 if (strcmp(t, "multi") == 0) {
213 if (TCG_OVERSIZED_GUEST) {
214 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
215 } else if (use_icount) {
216 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 217 } else {
86953503 218#ifndef TARGET_SUPPORTS_MTTCG
0765691e
MA
219 warn_report("Guest not yet converted to MTTCG - "
220 "you may get unexpected results");
c34c7620 221#endif
8d4e9146 222 if (!check_tcg_memory_orders_compatible()) {
0765691e
MA
223 warn_report("Guest expects a stronger memory ordering "
224 "than the host provides");
8cfef892 225 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
226 }
227 mttcg_enabled = true;
228 }
229 } else if (strcmp(t, "single") == 0) {
230 mttcg_enabled = false;
231 } else {
232 error_setg(errp, "Invalid 'thread' setting %s", t);
233 }
234 } else {
235 mttcg_enabled = default_mttcg_enabled();
236 }
237}
946fb27c 238
e4cd9657
AB
239/* The current number of executed instructions is based on what we
240 * originally budgeted minus the current state of the decrementing
241 * icount counters in extra/u16.low.
242 */
243static int64_t cpu_get_icount_executed(CPUState *cpu)
244{
5e140196
RH
245 return (cpu->icount_budget -
246 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
247}
248
512d3c80
AB
249/*
250 * Update the global shared timer_state.qemu_icount to take into
251 * account executed instructions. This is done by the TCG vCPU
252 * thread so the main-loop can see time has moved forward.
253 */
9b4e6f49 254static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
255{
256 int64_t executed = cpu_get_icount_executed(cpu);
257 cpu->icount_budget -= executed;
258
38adcb6e
EC
259 atomic_set_i64(&timers_state.qemu_icount,
260 timers_state.qemu_icount + executed);
9b4e6f49
PB
261}
262
263/*
264 * Update the global shared timer_state.qemu_icount to take into
265 * account executed instructions. This is done by the TCG vCPU
266 * thread so the main-loop can see time has moved forward.
267 */
268void cpu_update_icount(CPUState *cpu)
269{
270 seqlock_write_lock(&timers_state.vm_clock_seqlock,
271 &timers_state.vm_clock_lock);
272 cpu_update_icount_locked(cpu);
94377115
PB
273 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
274 &timers_state.vm_clock_lock);
512d3c80
AB
275}
276
c1ff073c 277static int64_t cpu_get_icount_raw_locked(void)
946fb27c 278{
4917cf44 279 CPUState *cpu = current_cpu;
946fb27c 280
243c5f77 281 if (cpu && cpu->running) {
414b15c9 282 if (!cpu->can_do_io) {
493d89bf 283 error_report("Bad icount read");
2a62914b 284 exit(1);
946fb27c 285 }
e4cd9657 286 /* Take into account what has run */
9b4e6f49 287 cpu_update_icount_locked(cpu);
946fb27c 288 }
38adcb6e
EC
289 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
290 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
291}
292
2a62914b
PD
293static int64_t cpu_get_icount_locked(void)
294{
c1ff073c 295 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
296 return atomic_read_i64(&timers_state.qemu_icount_bias) +
297 cpu_icount_to_ns(icount);
c1ff073c
PB
298}
299
300int64_t cpu_get_icount_raw(void)
301{
302 int64_t icount;
303 unsigned start;
304
305 do {
306 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
307 icount = cpu_get_icount_raw_locked();
308 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
309
310 return icount;
946fb27c
PB
311}
312
c1ff073c 313/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
314int64_t cpu_get_icount(void)
315{
316 int64_t icount;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 icount = cpu_get_icount_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return icount;
325}
326
3f031313
FK
327int64_t cpu_icount_to_ns(int64_t icount)
328{
c1ff073c 329 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
330}
331
f2a4ad6d
PB
332static int64_t cpu_get_ticks_locked(void)
333{
334 int64_t ticks = timers_state.cpu_ticks_offset;
335 if (timers_state.cpu_ticks_enabled) {
336 ticks += cpu_get_host_ticks();
337 }
338
339 if (timers_state.cpu_ticks_prev > ticks) {
340 /* Non increasing ticks may happen if the host uses software suspend. */
341 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
342 ticks = timers_state.cpu_ticks_prev;
343 }
344
345 timers_state.cpu_ticks_prev = ticks;
346 return ticks;
347}
348
d90f3cca
C
349/* return the time elapsed in VM between vm_start and vm_stop. Unless
350 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
351 * counter.
d90f3cca 352 */
946fb27c
PB
353int64_t cpu_get_ticks(void)
354{
5f3e3101
PB
355 int64_t ticks;
356
946fb27c
PB
357 if (use_icount) {
358 return cpu_get_icount();
359 }
5f3e3101 360
f2a4ad6d
PB
361 qemu_spin_lock(&timers_state.vm_clock_lock);
362 ticks = cpu_get_ticks_locked();
363 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 364 return ticks;
946fb27c
PB
365}
366
cb365646 367static int64_t cpu_get_clock_locked(void)
946fb27c 368{
1d45cea5 369 int64_t time;
cb365646 370
1d45cea5 371 time = timers_state.cpu_clock_offset;
5f3e3101 372 if (timers_state.cpu_ticks_enabled) {
1d45cea5 373 time += get_clock();
946fb27c 374 }
cb365646 375
1d45cea5 376 return time;
cb365646
LPF
377}
378
d90f3cca 379/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
380 * the time between vm_start and vm_stop
381 */
cb365646
LPF
382int64_t cpu_get_clock(void)
383{
384 int64_t ti;
385 unsigned start;
386
387 do {
388 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
389 ti = cpu_get_clock_locked();
390 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
391
392 return ti;
946fb27c
PB
393}
394
cb365646 395/* enable cpu_get_ticks()
3224e878 396 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 397 */
946fb27c
PB
398void cpu_enable_ticks(void)
399{
94377115
PB
400 seqlock_write_lock(&timers_state.vm_clock_seqlock,
401 &timers_state.vm_clock_lock);
946fb27c 402 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 403 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
404 timers_state.cpu_clock_offset -= get_clock();
405 timers_state.cpu_ticks_enabled = 1;
406 }
94377115
PB
407 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
408 &timers_state.vm_clock_lock);
946fb27c
PB
409}
410
411/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 412 * cpu_get_ticks() after that.
3224e878 413 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 414 */
946fb27c
PB
415void cpu_disable_ticks(void)
416{
94377115
PB
417 seqlock_write_lock(&timers_state.vm_clock_seqlock,
418 &timers_state.vm_clock_lock);
946fb27c 419 if (timers_state.cpu_ticks_enabled) {
4a7428c5 420 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 421 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
422 timers_state.cpu_ticks_enabled = 0;
423 }
94377115
PB
424 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
425 &timers_state.vm_clock_lock);
946fb27c
PB
426}
427
428/* Correlation between real and virtual time is always going to be
429 fairly approximate, so ignore small variation.
430 When the guest is idle real and virtual time will be aligned in
431 the IO wait loop. */
73bcb24d 432#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
433
434static void icount_adjust(void)
435{
436 int64_t cur_time;
437 int64_t cur_icount;
438 int64_t delta;
a3270e19
PB
439
440 /* Protected by TimersState mutex. */
946fb27c 441 static int64_t last_delta;
468cc7cf 442
946fb27c
PB
443 /* If the VM is not running, then do nothing. */
444 if (!runstate_is_running()) {
445 return;
446 }
468cc7cf 447
94377115
PB
448 seqlock_write_lock(&timers_state.vm_clock_seqlock,
449 &timers_state.vm_clock_lock);
17a15f1b
PB
450 cur_time = cpu_get_clock_locked();
451 cur_icount = cpu_get_icount_locked();
468cc7cf 452
946fb27c
PB
453 delta = cur_icount - cur_time;
454 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
455 if (delta > 0
456 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 457 && timers_state.icount_time_shift > 0) {
946fb27c 458 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
459 atomic_set(&timers_state.icount_time_shift,
460 timers_state.icount_time_shift - 1);
946fb27c
PB
461 }
462 if (delta < 0
463 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 464 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 465 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
466 atomic_set(&timers_state.icount_time_shift,
467 timers_state.icount_time_shift + 1);
946fb27c
PB
468 }
469 last_delta = delta;
c97595d1
EC
470 atomic_set_i64(&timers_state.qemu_icount_bias,
471 cur_icount - (timers_state.qemu_icount
472 << timers_state.icount_time_shift));
94377115
PB
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
474 &timers_state.vm_clock_lock);
946fb27c
PB
475}
476
477static void icount_adjust_rt(void *opaque)
478{
b39e3f34 479 timer_mod(timers_state.icount_rt_timer,
1979b908 480 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
481 icount_adjust();
482}
483
484static void icount_adjust_vm(void *opaque)
485{
b39e3f34 486 timer_mod(timers_state.icount_vm_timer,
40daca54 487 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 488 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
489 icount_adjust();
490}
491
492static int64_t qemu_icount_round(int64_t count)
493{
c1ff073c
PB
494 int shift = atomic_read(&timers_state.icount_time_shift);
495 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
496}
497
efab87cf 498static void icount_warp_rt(void)
946fb27c 499{
ccffff48
AB
500 unsigned seq;
501 int64_t warp_start;
502
17a15f1b
PB
503 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
504 * changes from -1 to another value, so the race here is okay.
505 */
ccffff48
AB
506 do {
507 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 508 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
509 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
510
511 if (warp_start == -1) {
946fb27c
PB
512 return;
513 }
514
94377115
PB
515 seqlock_write_lock(&timers_state.vm_clock_seqlock,
516 &timers_state.vm_clock_lock);
946fb27c 517 if (runstate_is_running()) {
74c0b816
PB
518 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
519 cpu_get_clock_locked());
8ed961d9
PB
520 int64_t warp_delta;
521
b39e3f34 522 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 523 if (use_icount == 2) {
946fb27c 524 /*
40daca54 525 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
526 * far ahead of real time.
527 */
17a15f1b 528 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 529 int64_t delta = clock - cur_icount;
8ed961d9 530 warp_delta = MIN(warp_delta, delta);
946fb27c 531 }
c97595d1
EC
532 atomic_set_i64(&timers_state.qemu_icount_bias,
533 timers_state.qemu_icount_bias + warp_delta);
946fb27c 534 }
b39e3f34 535 timers_state.vm_clock_warp_start = -1;
94377115
PB
536 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
537 &timers_state.vm_clock_lock);
8ed961d9
PB
538
539 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
540 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
541 }
946fb27c
PB
542}
543
e76d1798 544static void icount_timer_cb(void *opaque)
efab87cf 545{
e76d1798
PD
546 /* No need for a checkpoint because the timer already synchronizes
547 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
548 */
549 icount_warp_rt();
efab87cf
PD
550}
551
8156be56
PB
552void qtest_clock_warp(int64_t dest)
553{
40daca54 554 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 555 AioContext *aio_context;
8156be56 556 assert(qtest_enabled());
efef88b3 557 aio_context = qemu_get_aio_context();
8156be56 558 while (clock < dest) {
dcb15780
PD
559 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
560 QEMU_TIMER_ATTR_ALL);
c9299e2f 561 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 562
94377115
PB
563 seqlock_write_lock(&timers_state.vm_clock_seqlock,
564 &timers_state.vm_clock_lock);
c97595d1
EC
565 atomic_set_i64(&timers_state.qemu_icount_bias,
566 timers_state.qemu_icount_bias + warp);
94377115
PB
567 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
568 &timers_state.vm_clock_lock);
17a15f1b 569
40daca54 570 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 571 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 572 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 573 }
40daca54 574 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
575}
576
e76d1798 577void qemu_start_warp_timer(void)
946fb27c 578{
ce78d18c 579 int64_t clock;
946fb27c
PB
580 int64_t deadline;
581
e76d1798 582 if (!use_icount) {
946fb27c
PB
583 return;
584 }
585
8bd7f71d
PD
586 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
587 * do not fire, so computing the deadline does not make sense.
588 */
589 if (!runstate_is_running()) {
590 return;
591 }
592
0c08185f
PD
593 if (replay_mode != REPLAY_MODE_PLAY) {
594 if (!all_cpu_threads_idle()) {
595 return;
596 }
8bd7f71d 597
0c08185f
PD
598 if (qtest_enabled()) {
599 /* When testing, qtest commands advance icount. */
600 return;
601 }
946fb27c 602
0c08185f
PD
603 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
604 } else {
605 /* warp clock deterministically in record/replay mode */
606 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
607 /* vCPU is sleeping and warp can't be started.
608 It is probably a race condition: notification sent
609 to vCPU was processed in advance and vCPU went to sleep.
610 Therefore we have to wake it up for doing someting. */
611 if (replay_has_checkpoint()) {
612 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
613 }
614 return;
615 }
8156be56
PB
616 }
617
ac70aafc 618 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 619 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
dcb15780
PD
620 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
621 ~QEMU_TIMER_ATTR_EXTERNAL);
ce78d18c 622 if (deadline < 0) {
d7a0f71d
VC
623 static bool notified;
624 if (!icount_sleep && !notified) {
3dc6f869 625 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
626 notified = true;
627 }
ce78d18c 628 return;
ac70aafc
AB
629 }
630
946fb27c
PB
631 if (deadline > 0) {
632 /*
40daca54 633 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
634 * sleep. Otherwise, the CPU might be waiting for a future timer
635 * interrupt to wake it up, but the interrupt never comes because
636 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 637 * QEMU_CLOCK_VIRTUAL.
946fb27c 638 */
5045e9d9
VC
639 if (!icount_sleep) {
640 /*
641 * We never let VCPUs sleep in no sleep icount mode.
642 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
643 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
644 * It is useful when we want a deterministic execution time,
645 * isolated from host latencies.
646 */
94377115
PB
647 seqlock_write_lock(&timers_state.vm_clock_seqlock,
648 &timers_state.vm_clock_lock);
c97595d1
EC
649 atomic_set_i64(&timers_state.qemu_icount_bias,
650 timers_state.qemu_icount_bias + deadline);
94377115
PB
651 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
652 &timers_state.vm_clock_lock);
5045e9d9
VC
653 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
654 } else {
655 /*
656 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
657 * "real" time, (related to the time left until the next event) has
658 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
659 * This avoids that the warps are visible externally; for example,
660 * you will not be sending network packets continuously instead of
661 * every 100ms.
662 */
94377115
PB
663 seqlock_write_lock(&timers_state.vm_clock_seqlock,
664 &timers_state.vm_clock_lock);
b39e3f34
PD
665 if (timers_state.vm_clock_warp_start == -1
666 || timers_state.vm_clock_warp_start > clock) {
667 timers_state.vm_clock_warp_start = clock;
5045e9d9 668 }
94377115
PB
669 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
670 &timers_state.vm_clock_lock);
b39e3f34
PD
671 timer_mod_anticipate(timers_state.icount_warp_timer,
672 clock + deadline);
ce78d18c 673 }
ac70aafc 674 } else if (deadline == 0) {
40daca54 675 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
676 }
677}
678
e76d1798
PD
679static void qemu_account_warp_timer(void)
680{
681 if (!use_icount || !icount_sleep) {
682 return;
683 }
684
685 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
686 * do not fire, so computing the deadline does not make sense.
687 */
688 if (!runstate_is_running()) {
689 return;
690 }
691
692 /* warp clock deterministically in record/replay mode */
693 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
694 return;
695 }
696
b39e3f34 697 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
698 icount_warp_rt();
699}
700
d09eae37
FK
701static bool icount_state_needed(void *opaque)
702{
703 return use_icount;
704}
705
b39e3f34
PD
706static bool warp_timer_state_needed(void *opaque)
707{
708 TimersState *s = opaque;
709 return s->icount_warp_timer != NULL;
710}
711
712static bool adjust_timers_state_needed(void *opaque)
713{
714 TimersState *s = opaque;
715 return s->icount_rt_timer != NULL;
716}
717
718/*
719 * Subsection for warp timer migration is optional, because may not be created
720 */
721static const VMStateDescription icount_vmstate_warp_timer = {
722 .name = "timer/icount/warp_timer",
723 .version_id = 1,
724 .minimum_version_id = 1,
725 .needed = warp_timer_state_needed,
726 .fields = (VMStateField[]) {
727 VMSTATE_INT64(vm_clock_warp_start, TimersState),
728 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
729 VMSTATE_END_OF_LIST()
730 }
731};
732
733static const VMStateDescription icount_vmstate_adjust_timers = {
734 .name = "timer/icount/timers",
735 .version_id = 1,
736 .minimum_version_id = 1,
737 .needed = adjust_timers_state_needed,
738 .fields = (VMStateField[]) {
739 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
740 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
741 VMSTATE_END_OF_LIST()
742 }
743};
744
d09eae37
FK
745/*
746 * This is a subsection for icount migration.
747 */
748static const VMStateDescription icount_vmstate_timers = {
749 .name = "timer/icount",
750 .version_id = 1,
751 .minimum_version_id = 1,
5cd8cada 752 .needed = icount_state_needed,
d09eae37
FK
753 .fields = (VMStateField[]) {
754 VMSTATE_INT64(qemu_icount_bias, TimersState),
755 VMSTATE_INT64(qemu_icount, TimersState),
756 VMSTATE_END_OF_LIST()
b39e3f34
PD
757 },
758 .subsections = (const VMStateDescription*[]) {
759 &icount_vmstate_warp_timer,
760 &icount_vmstate_adjust_timers,
761 NULL
d09eae37
FK
762 }
763};
764
946fb27c
PB
765static const VMStateDescription vmstate_timers = {
766 .name = "timer",
767 .version_id = 2,
768 .minimum_version_id = 1,
35d08458 769 .fields = (VMStateField[]) {
946fb27c 770 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 771 VMSTATE_UNUSED(8),
946fb27c
PB
772 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
773 VMSTATE_END_OF_LIST()
d09eae37 774 },
5cd8cada
JQ
775 .subsections = (const VMStateDescription*[]) {
776 &icount_vmstate_timers,
777 NULL
946fb27c
PB
778 }
779};
780
14e6fe12 781static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 782{
2adcc85d
JH
783 double pct;
784 double throttle_ratio;
785 long sleeptime_ns;
786
787 if (!cpu_throttle_get_percentage()) {
788 return;
789 }
790
791 pct = (double)cpu_throttle_get_percentage()/100;
792 throttle_ratio = pct / (1 - pct);
793 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
794
795 qemu_mutex_unlock_iothread();
2adcc85d
JH
796 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
797 qemu_mutex_lock_iothread();
90bb0c04 798 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
799}
800
801static void cpu_throttle_timer_tick(void *opaque)
802{
803 CPUState *cpu;
804 double pct;
805
806 /* Stop the timer if needed */
807 if (!cpu_throttle_get_percentage()) {
808 return;
809 }
810 CPU_FOREACH(cpu) {
811 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
812 async_run_on_cpu(cpu, cpu_throttle_thread,
813 RUN_ON_CPU_NULL);
2adcc85d
JH
814 }
815 }
816
817 pct = (double)cpu_throttle_get_percentage()/100;
818 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
819 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
820}
821
822void cpu_throttle_set(int new_throttle_pct)
823{
824 /* Ensure throttle percentage is within valid range */
825 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
826 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
827
828 atomic_set(&throttle_percentage, new_throttle_pct);
829
830 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
831 CPU_THROTTLE_TIMESLICE_NS);
832}
833
834void cpu_throttle_stop(void)
835{
836 atomic_set(&throttle_percentage, 0);
837}
838
839bool cpu_throttle_active(void)
840{
841 return (cpu_throttle_get_percentage() != 0);
842}
843
844int cpu_throttle_get_percentage(void)
845{
846 return atomic_read(&throttle_percentage);
847}
848
4603ea01
PD
849void cpu_ticks_init(void)
850{
ccdb3c1f 851 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 852 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 853 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
854 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
855 cpu_throttle_timer_tick, NULL);
4603ea01
PD
856}
857
1ad9580b 858void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 859{
1ad9580b 860 const char *option;
a8bfac37 861 char *rem_str = NULL;
1ad9580b 862
1ad9580b 863 option = qemu_opt_get(opts, "shift");
946fb27c 864 if (!option) {
a8bfac37
ST
865 if (qemu_opt_get(opts, "align") != NULL) {
866 error_setg(errp, "Please specify shift option when using align");
867 }
946fb27c
PB
868 return;
869 }
f1f4b57e
VC
870
871 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9 872 if (icount_sleep) {
b39e3f34 873 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 874 icount_timer_cb, NULL);
5045e9d9 875 }
f1f4b57e 876
a8bfac37 877 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
878
879 if (icount_align_option && !icount_sleep) {
778d9f9b 880 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 881 }
946fb27c 882 if (strcmp(option, "auto") != 0) {
a8bfac37 883 errno = 0;
c1ff073c 884 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
a8bfac37
ST
885 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
886 error_setg(errp, "icount: Invalid shift value");
887 }
946fb27c
PB
888 use_icount = 1;
889 return;
a8bfac37
ST
890 } else if (icount_align_option) {
891 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 892 } else if (!icount_sleep) {
778d9f9b 893 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
894 }
895
896 use_icount = 2;
897
898 /* 125MIPS seems a reasonable initial guess at the guest speed.
899 It will be corrected fairly quickly anyway. */
c1ff073c 900 timers_state.icount_time_shift = 3;
946fb27c
PB
901
902 /* Have both realtime and virtual time triggers for speed adjustment.
903 The realtime trigger catches emulated time passing too slowly,
904 the virtual time trigger catches emulated time passing too fast.
905 Realtime triggers occur even when idle, so use them less frequently
906 than VM triggers. */
b39e3f34
PD
907 timers_state.vm_clock_warp_start = -1;
908 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 909 icount_adjust_rt, NULL);
b39e3f34 910 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 911 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 912 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 913 icount_adjust_vm, NULL);
b39e3f34 914 timer_mod(timers_state.icount_vm_timer,
40daca54 915 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 916 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
917}
918
6546706d
AB
919/***********************************************************/
920/* TCG vCPU kick timer
921 *
922 * The kick timer is responsible for moving single threaded vCPU
923 * emulation on to the next vCPU. If more than one vCPU is running a
924 * timer event with force a cpu->exit so the next vCPU can get
925 * scheduled.
926 *
927 * The timer is removed if all vCPUs are idle and restarted again once
928 * idleness is complete.
929 */
930
931static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 932static CPUState *tcg_current_rr_cpu;
6546706d
AB
933
934#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
935
936static inline int64_t qemu_tcg_next_kick(void)
937{
938 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
939}
940
791158d9
AB
941/* Kick the currently round-robin scheduled vCPU */
942static void qemu_cpu_kick_rr_cpu(void)
943{
944 CPUState *cpu;
791158d9
AB
945 do {
946 cpu = atomic_mb_read(&tcg_current_rr_cpu);
947 if (cpu) {
948 cpu_exit(cpu);
949 }
950 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
951}
952
6b8f0187
PB
953static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
954{
955}
956
3f53bc61
PB
957void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
958{
6b8f0187
PB
959 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
960 qemu_notify_event();
961 return;
962 }
963
c52e7132
PM
964 if (qemu_in_vcpu_thread()) {
965 /* A CPU is currently running; kick it back out to the
966 * tcg_cpu_exec() loop so it will recalculate its
967 * icount deadline immediately.
968 */
969 qemu_cpu_kick(current_cpu);
970 } else if (first_cpu) {
6b8f0187
PB
971 /* qemu_cpu_kick is not enough to kick a halted CPU out of
972 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
973 * causes cpu_thread_is_idle to return false. This way,
974 * handle_icount_deadline can run.
c52e7132
PM
975 * If we have no CPUs at all for some reason, we don't
976 * need to do anything.
6b8f0187
PB
977 */
978 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
979 }
3f53bc61
PB
980}
981
6546706d
AB
982static void kick_tcg_thread(void *opaque)
983{
984 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 985 qemu_cpu_kick_rr_cpu();
6546706d
AB
986}
987
988static void start_tcg_kick_timer(void)
989{
db08b687
PB
990 assert(!mttcg_enabled);
991 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
992 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
993 kick_tcg_thread, NULL);
1926ab27
AB
994 }
995 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
996 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
997 }
998}
999
1000static void stop_tcg_kick_timer(void)
1001{
db08b687 1002 assert(!mttcg_enabled);
1926ab27 1003 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 1004 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
1005 }
1006}
1007
296af7c9
BS
1008/***********************************************************/
1009void hw_error(const char *fmt, ...)
1010{
1011 va_list ap;
55e5c285 1012 CPUState *cpu;
296af7c9
BS
1013
1014 va_start(ap, fmt);
1015 fprintf(stderr, "qemu: hardware error: ");
1016 vfprintf(stderr, fmt, ap);
1017 fprintf(stderr, "\n");
bdc44640 1018 CPU_FOREACH(cpu) {
55e5c285 1019 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 1020 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
1021 }
1022 va_end(ap);
1023 abort();
1024}
1025
1026void cpu_synchronize_all_states(void)
1027{
182735ef 1028 CPUState *cpu;
296af7c9 1029
bdc44640 1030 CPU_FOREACH(cpu) {
182735ef 1031 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
1032 /* TODO: move to cpu_synchronize_state() */
1033 if (hvf_enabled()) {
1034 hvf_cpu_synchronize_state(cpu);
1035 }
296af7c9
BS
1036 }
1037}
1038
1039void cpu_synchronize_all_post_reset(void)
1040{
182735ef 1041 CPUState *cpu;
296af7c9 1042
bdc44640 1043 CPU_FOREACH(cpu) {
182735ef 1044 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1045 /* TODO: move to cpu_synchronize_post_reset() */
1046 if (hvf_enabled()) {
1047 hvf_cpu_synchronize_post_reset(cpu);
1048 }
296af7c9
BS
1049 }
1050}
1051
1052void cpu_synchronize_all_post_init(void)
1053{
182735ef 1054 CPUState *cpu;
296af7c9 1055
bdc44640 1056 CPU_FOREACH(cpu) {
182735ef 1057 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1058 /* TODO: move to cpu_synchronize_post_init() */
1059 if (hvf_enabled()) {
1060 hvf_cpu_synchronize_post_init(cpu);
1061 }
296af7c9
BS
1062 }
1063}
1064
75e972da
DG
1065void cpu_synchronize_all_pre_loadvm(void)
1066{
1067 CPUState *cpu;
1068
1069 CPU_FOREACH(cpu) {
1070 cpu_synchronize_pre_loadvm(cpu);
1071 }
1072}
1073
4486e89c 1074static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1075{
56983463
KW
1076 int ret = 0;
1077
1354869c 1078 if (runstate_is_running()) {
296af7c9 1079 cpu_disable_ticks();
296af7c9 1080 pause_all_vcpus();
f5bbfba1 1081 runstate_set(state);
1dfb4dd9 1082 vm_state_notify(0, state);
4486e89c 1083 if (send_stop) {
3ab72385 1084 qapi_event_send_stop();
4486e89c 1085 }
296af7c9 1086 }
56983463 1087
594a45ce 1088 bdrv_drain_all();
6d0ceb80 1089 replay_disable_events();
22af08ea 1090 ret = bdrv_flush_all();
594a45ce 1091
56983463 1092 return ret;
296af7c9
BS
1093}
1094
4486e89c
SH
1095/* Special vm_stop() variant for terminating the process. Historically clients
1096 * did not expect a QMP STOP event and so we need to retain compatibility.
1097 */
1098int vm_shutdown(void)
1099{
1100 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1101}
1102
a1fcaa73 1103static bool cpu_can_run(CPUState *cpu)
296af7c9 1104{
4fdeee7c 1105 if (cpu->stop) {
a1fcaa73 1106 return false;
0ab07c62 1107 }
321bc0b2 1108 if (cpu_is_stopped(cpu)) {
a1fcaa73 1109 return false;
0ab07c62 1110 }
a1fcaa73 1111 return true;
296af7c9
BS
1112}
1113
91325046 1114static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1115{
64f6b346 1116 gdb_set_stop_cpu(cpu);
8cf71710 1117 qemu_system_debug_request();
f324e766 1118 cpu->stopped = true;
3c638d06
JK
1119}
1120
6d9cb73c
JK
1121#ifdef CONFIG_LINUX
1122static void sigbus_reraise(void)
1123{
1124 sigset_t set;
1125 struct sigaction action;
1126
1127 memset(&action, 0, sizeof(action));
1128 action.sa_handler = SIG_DFL;
1129 if (!sigaction(SIGBUS, &action, NULL)) {
1130 raise(SIGBUS);
1131 sigemptyset(&set);
1132 sigaddset(&set, SIGBUS);
a2d1761d 1133 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1134 }
1135 perror("Failed to re-raise SIGBUS!\n");
1136 abort();
1137}
1138
d98d4072 1139static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1140{
a16fc07e
PB
1141 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1142 sigbus_reraise();
1143 }
1144
2ae41db2
PB
1145 if (current_cpu) {
1146 /* Called asynchronously in VCPU thread. */
1147 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1148 sigbus_reraise();
1149 }
1150 } else {
1151 /* Called synchronously (via signalfd) in main thread. */
1152 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1153 sigbus_reraise();
1154 }
6d9cb73c
JK
1155 }
1156}
1157
1158static void qemu_init_sigbus(void)
1159{
1160 struct sigaction action;
1161
1162 memset(&action, 0, sizeof(action));
1163 action.sa_flags = SA_SIGINFO;
d98d4072 1164 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1165 sigaction(SIGBUS, &action, NULL);
1166
1167 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1168}
6d9cb73c 1169#else /* !CONFIG_LINUX */
6d9cb73c
JK
1170static void qemu_init_sigbus(void)
1171{
1172}
a16fc07e 1173#endif /* !CONFIG_LINUX */
ff48eb5f 1174
b2532d88 1175static QemuMutex qemu_global_mutex;
296af7c9
BS
1176
1177static QemuThread io_thread;
1178
296af7c9
BS
1179/* cpu creation */
1180static QemuCond qemu_cpu_cond;
1181/* system init */
296af7c9
BS
1182static QemuCond qemu_pause_cond;
1183
d3b12f5d 1184void qemu_init_cpu_loop(void)
296af7c9 1185{
6d9cb73c 1186 qemu_init_sigbus();
ed94592b 1187 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1188 qemu_cond_init(&qemu_pause_cond);
296af7c9 1189 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1190
b7680cb6 1191 qemu_thread_get_self(&io_thread);
296af7c9
BS
1192}
1193
14e6fe12 1194void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1195{
d148d90e 1196 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1197}
1198
4c055ab5
GZ
1199static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1200{
1201 if (kvm_destroy_vcpu(cpu) < 0) {
1202 error_report("kvm_destroy_vcpu failed");
1203 exit(EXIT_FAILURE);
1204 }
1205}
1206
1207static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1208{
1209}
1210
ebd05fea
DH
1211static void qemu_cpu_stop(CPUState *cpu, bool exit)
1212{
1213 g_assert(qemu_cpu_is_self(cpu));
1214 cpu->stop = false;
1215 cpu->stopped = true;
1216 if (exit) {
1217 cpu_exit(cpu);
1218 }
1219 qemu_cond_broadcast(&qemu_pause_cond);
1220}
1221
509a0d78 1222static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1223{
37257942 1224 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1225 if (cpu->stop) {
ebd05fea 1226 qemu_cpu_stop(cpu, false);
296af7c9 1227 }
a5403c69 1228 process_queued_cpu_work(cpu);
37257942
AB
1229}
1230
a8efa606 1231static void qemu_tcg_rr_wait_io_event(void)
37257942 1232{
a8efa606
PB
1233 CPUState *cpu;
1234
db08b687 1235 while (all_cpu_threads_idle()) {
6546706d 1236 stop_tcg_kick_timer();
a8efa606 1237 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1238 }
296af7c9 1239
6546706d
AB
1240 start_tcg_kick_timer();
1241
a8efa606
PB
1242 CPU_FOREACH(cpu) {
1243 qemu_wait_io_event_common(cpu);
1244 }
296af7c9
BS
1245}
1246
db08b687 1247static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1248{
a98ae1d8 1249 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1250 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1251 }
296af7c9 1252
db08b687
PB
1253#ifdef _WIN32
1254 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1255 if (!tcg_enabled()) {
1256 SleepEx(0, TRUE);
c97d6d2c 1257 }
db08b687 1258#endif
c97d6d2c
SAGDR
1259 qemu_wait_io_event_common(cpu);
1260}
1261
7e97cd88 1262static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1263{
48a106bd 1264 CPUState *cpu = arg;
84b4915d 1265 int r;
296af7c9 1266
ab28bd23
PB
1267 rcu_register_thread();
1268
2e7f7a3c 1269 qemu_mutex_lock_iothread();
814e612e 1270 qemu_thread_get_self(cpu->thread);
9f09e18a 1271 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1272 cpu->can_do_io = 1;
4917cf44 1273 current_cpu = cpu;
296af7c9 1274
504134d2 1275 r = kvm_init_vcpu(cpu);
84b4915d 1276 if (r < 0) {
493d89bf 1277 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1278 exit(1);
1279 }
296af7c9 1280
18268b60 1281 kvm_init_cpu_signals(cpu);
296af7c9
BS
1282
1283 /* signal CPU creation */
61a46217 1284 cpu->created = true;
296af7c9 1285 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1286 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1287
4c055ab5 1288 do {
a1fcaa73 1289 if (cpu_can_run(cpu)) {
1458c363 1290 r = kvm_cpu_exec(cpu);
83f338f7 1291 if (r == EXCP_DEBUG) {
91325046 1292 cpu_handle_guest_debug(cpu);
83f338f7 1293 }
0ab07c62 1294 }
db08b687 1295 qemu_wait_io_event(cpu);
4c055ab5 1296 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1297
4c055ab5 1298 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1299 cpu->created = false;
1300 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1301 qemu_mutex_unlock_iothread();
57615ed5 1302 rcu_unregister_thread();
296af7c9
BS
1303 return NULL;
1304}
1305
c7f0f3b1
AL
1306static void *qemu_dummy_cpu_thread_fn(void *arg)
1307{
1308#ifdef _WIN32
493d89bf 1309 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1310 exit(1);
1311#else
10a9021d 1312 CPUState *cpu = arg;
c7f0f3b1
AL
1313 sigset_t waitset;
1314 int r;
1315
ab28bd23
PB
1316 rcu_register_thread();
1317
c7f0f3b1 1318 qemu_mutex_lock_iothread();
814e612e 1319 qemu_thread_get_self(cpu->thread);
9f09e18a 1320 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1321 cpu->can_do_io = 1;
37257942 1322 current_cpu = cpu;
c7f0f3b1
AL
1323
1324 sigemptyset(&waitset);
1325 sigaddset(&waitset, SIG_IPI);
1326
1327 /* signal CPU creation */
61a46217 1328 cpu->created = true;
c7f0f3b1 1329 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1330 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1331
d2831ab0 1332 do {
c7f0f3b1
AL
1333 qemu_mutex_unlock_iothread();
1334 do {
1335 int sig;
1336 r = sigwait(&waitset, &sig);
1337 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1338 if (r == -1) {
1339 perror("sigwait");
1340 exit(1);
1341 }
1342 qemu_mutex_lock_iothread();
db08b687 1343 qemu_wait_io_event(cpu);
d2831ab0 1344 } while (!cpu->unplug);
c7f0f3b1 1345
d40bfcbb 1346 qemu_mutex_unlock_iothread();
d2831ab0 1347 rcu_unregister_thread();
c7f0f3b1
AL
1348 return NULL;
1349#endif
1350}
1351
1be7fcb8
AB
1352static int64_t tcg_get_icount_limit(void)
1353{
1354 int64_t deadline;
1355
1356 if (replay_mode != REPLAY_MODE_PLAY) {
dcb15780
PD
1357 /*
1358 * Include all the timers, because they may need an attention.
1359 * Too long CPU execution may create unnecessary delay in UI.
1360 */
1361 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1362 QEMU_TIMER_ATTR_ALL);
1be7fcb8
AB
1363
1364 /* Maintain prior (possibly buggy) behaviour where if no deadline
1365 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1366 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1367 * nanoseconds.
1368 */
1369 if ((deadline < 0) || (deadline > INT32_MAX)) {
1370 deadline = INT32_MAX;
1371 }
1372
1373 return qemu_icount_round(deadline);
1374 } else {
1375 return replay_get_instructions();
1376 }
1377}
1378
12e9700d
AB
1379static void handle_icount_deadline(void)
1380{
6b8f0187 1381 assert(qemu_in_vcpu_thread());
12e9700d 1382 if (use_icount) {
dcb15780
PD
1383 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1384 QEMU_TIMER_ATTR_ALL);
12e9700d
AB
1385
1386 if (deadline == 0) {
6b8f0187 1387 /* Wake up other AioContexts. */
12e9700d 1388 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1389 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1390 }
1391 }
1392}
1393
05248382 1394static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1395{
1be7fcb8 1396 if (use_icount) {
eda5f7c6 1397 int insns_left;
05248382
AB
1398
1399 /* These should always be cleared by process_icount_data after
1400 * each vCPU execution. However u16.high can be raised
1401 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1402 */
5e140196 1403 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1404 g_assert(cpu->icount_extra == 0);
1405
eda5f7c6
AB
1406 cpu->icount_budget = tcg_get_icount_limit();
1407 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1408 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1409 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1410
1411 replay_mutex_lock();
1be7fcb8 1412 }
05248382
AB
1413}
1414
1415static void process_icount_data(CPUState *cpu)
1416{
1be7fcb8 1417 if (use_icount) {
e4cd9657 1418 /* Account for executed instructions */
512d3c80 1419 cpu_update_icount(cpu);
05248382
AB
1420
1421 /* Reset the counters */
5e140196 1422 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1423 cpu->icount_extra = 0;
e4cd9657
AB
1424 cpu->icount_budget = 0;
1425
1be7fcb8 1426 replay_account_executed_instructions();
d759c951
AB
1427
1428 replay_mutex_unlock();
1be7fcb8 1429 }
05248382
AB
1430}
1431
1432
1433static int tcg_cpu_exec(CPUState *cpu)
1434{
1435 int ret;
1436#ifdef CONFIG_PROFILER
1437 int64_t ti;
1438#endif
1439
f28d0dfd 1440 assert(tcg_enabled());
05248382
AB
1441#ifdef CONFIG_PROFILER
1442 ti = profile_getclock();
1443#endif
05248382
AB
1444 cpu_exec_start(cpu);
1445 ret = cpu_exec(cpu);
1446 cpu_exec_end(cpu);
05248382 1447#ifdef CONFIG_PROFILER
72fd2efb
EC
1448 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1449 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1450#endif
1be7fcb8
AB
1451 return ret;
1452}
1453
c93bbbef
AB
1454/* Destroy any remaining vCPUs which have been unplugged and have
1455 * finished running
1456 */
1457static void deal_with_unplugged_cpus(void)
1be7fcb8 1458{
c93bbbef 1459 CPUState *cpu;
1be7fcb8 1460
c93bbbef
AB
1461 CPU_FOREACH(cpu) {
1462 if (cpu->unplug && !cpu_can_run(cpu)) {
1463 qemu_tcg_destroy_vcpu(cpu);
1464 cpu->created = false;
1465 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1466 break;
1467 }
1468 }
1be7fcb8 1469}
bdb7ca67 1470
6546706d
AB
1471/* Single-threaded TCG
1472 *
1473 * In the single-threaded case each vCPU is simulated in turn. If
1474 * there is more than a single vCPU we create a simple timer to kick
1475 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1476 * This is done explicitly rather than relying on side-effects
1477 * elsewhere.
1478 */
1479
37257942 1480static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1481{
c3586ba7 1482 CPUState *cpu = arg;
296af7c9 1483
f28d0dfd 1484 assert(tcg_enabled());
ab28bd23 1485 rcu_register_thread();
3468b59e 1486 tcg_register_thread();
ab28bd23 1487
2e7f7a3c 1488 qemu_mutex_lock_iothread();
814e612e 1489 qemu_thread_get_self(cpu->thread);
296af7c9 1490
5a9c973b
DH
1491 cpu->thread_id = qemu_get_thread_id();
1492 cpu->created = true;
1493 cpu->can_do_io = 1;
296af7c9 1494 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1495 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1496
fa7d1867 1497 /* wait for initial kick-off after machine start */
c28e399c 1498 while (first_cpu->stopped) {
d5f8d613 1499 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1500
1501 /* process any pending work */
bdc44640 1502 CPU_FOREACH(cpu) {
37257942 1503 current_cpu = cpu;
182735ef 1504 qemu_wait_io_event_common(cpu);
8e564b4e 1505 }
0ab07c62 1506 }
296af7c9 1507
6546706d
AB
1508 start_tcg_kick_timer();
1509
c93bbbef
AB
1510 cpu = first_cpu;
1511
e5143e30
AB
1512 /* process any pending work */
1513 cpu->exit_request = 1;
1514
296af7c9 1515 while (1) {
d759c951
AB
1516 qemu_mutex_unlock_iothread();
1517 replay_mutex_lock();
1518 qemu_mutex_lock_iothread();
c93bbbef
AB
1519 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1520 qemu_account_warp_timer();
1521
6b8f0187
PB
1522 /* Run the timers here. This is much more efficient than
1523 * waking up the I/O thread and waiting for completion.
1524 */
1525 handle_icount_deadline();
1526
d759c951
AB
1527 replay_mutex_unlock();
1528
c93bbbef
AB
1529 if (!cpu) {
1530 cpu = first_cpu;
1531 }
1532
e5143e30
AB
1533 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1534
791158d9 1535 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1536 current_cpu = cpu;
c93bbbef
AB
1537
1538 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1539 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1540
1541 if (cpu_can_run(cpu)) {
1542 int r;
05248382 1543
d759c951 1544 qemu_mutex_unlock_iothread();
05248382
AB
1545 prepare_icount_for_run(cpu);
1546
c93bbbef 1547 r = tcg_cpu_exec(cpu);
05248382
AB
1548
1549 process_icount_data(cpu);
d759c951 1550 qemu_mutex_lock_iothread();
05248382 1551
c93bbbef
AB
1552 if (r == EXCP_DEBUG) {
1553 cpu_handle_guest_debug(cpu);
1554 break;
08e73c48
PK
1555 } else if (r == EXCP_ATOMIC) {
1556 qemu_mutex_unlock_iothread();
1557 cpu_exec_step_atomic(cpu);
1558 qemu_mutex_lock_iothread();
1559 break;
c93bbbef 1560 }
37257942 1561 } else if (cpu->stop) {
c93bbbef
AB
1562 if (cpu->unplug) {
1563 cpu = CPU_NEXT(cpu);
1564 }
1565 break;
1566 }
1567
e5143e30
AB
1568 cpu = CPU_NEXT(cpu);
1569 } /* while (cpu && !cpu->exit_request).. */
1570
791158d9
AB
1571 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1572 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1573
e5143e30
AB
1574 if (cpu && cpu->exit_request) {
1575 atomic_mb_set(&cpu->exit_request, 0);
1576 }
ac70aafc 1577
013aabdc
CD
1578 if (use_icount && all_cpu_threads_idle()) {
1579 /*
1580 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1581 * in the main_loop, wake it up in order to start the warp timer.
1582 */
1583 qemu_notify_event();
1584 }
1585
a8efa606 1586 qemu_tcg_rr_wait_io_event();
c93bbbef 1587 deal_with_unplugged_cpus();
296af7c9
BS
1588 }
1589
9b0605f9 1590 rcu_unregister_thread();
296af7c9
BS
1591 return NULL;
1592}
1593
b0cb0a66
VP
1594static void *qemu_hax_cpu_thread_fn(void *arg)
1595{
1596 CPUState *cpu = arg;
1597 int r;
b3d3a426 1598
9857c2d2 1599 rcu_register_thread();
b3d3a426 1600 qemu_mutex_lock_iothread();
b0cb0a66 1601 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1602
1603 cpu->thread_id = qemu_get_thread_id();
1604 cpu->created = true;
b0cb0a66
VP
1605 current_cpu = cpu;
1606
1607 hax_init_vcpu(cpu);
1608 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1609 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1610
9857c2d2 1611 do {
b0cb0a66
VP
1612 if (cpu_can_run(cpu)) {
1613 r = hax_smp_cpu_exec(cpu);
1614 if (r == EXCP_DEBUG) {
1615 cpu_handle_guest_debug(cpu);
1616 }
1617 }
1618
db08b687 1619 qemu_wait_io_event(cpu);
9857c2d2
PB
1620 } while (!cpu->unplug || cpu_can_run(cpu));
1621 rcu_unregister_thread();
b0cb0a66
VP
1622 return NULL;
1623}
1624
c97d6d2c
SAGDR
1625/* The HVF-specific vCPU thread function. This one should only run when the host
1626 * CPU supports the VMX "unrestricted guest" feature. */
1627static void *qemu_hvf_cpu_thread_fn(void *arg)
1628{
1629 CPUState *cpu = arg;
1630
1631 int r;
1632
1633 assert(hvf_enabled());
1634
1635 rcu_register_thread();
1636
1637 qemu_mutex_lock_iothread();
1638 qemu_thread_get_self(cpu->thread);
1639
1640 cpu->thread_id = qemu_get_thread_id();
1641 cpu->can_do_io = 1;
1642 current_cpu = cpu;
1643
1644 hvf_init_vcpu(cpu);
1645
1646 /* signal CPU creation */
1647 cpu->created = true;
1648 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1649 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1650
1651 do {
1652 if (cpu_can_run(cpu)) {
1653 r = hvf_vcpu_exec(cpu);
1654 if (r == EXCP_DEBUG) {
1655 cpu_handle_guest_debug(cpu);
1656 }
1657 }
db08b687 1658 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1659 } while (!cpu->unplug || cpu_can_run(cpu));
1660
1661 hvf_vcpu_destroy(cpu);
1662 cpu->created = false;
1663 qemu_cond_signal(&qemu_cpu_cond);
1664 qemu_mutex_unlock_iothread();
8178e637 1665 rcu_unregister_thread();
c97d6d2c
SAGDR
1666 return NULL;
1667}
1668
19306806
JTV
1669static void *qemu_whpx_cpu_thread_fn(void *arg)
1670{
1671 CPUState *cpu = arg;
1672 int r;
1673
1674 rcu_register_thread();
1675
1676 qemu_mutex_lock_iothread();
1677 qemu_thread_get_self(cpu->thread);
1678 cpu->thread_id = qemu_get_thread_id();
1679 current_cpu = cpu;
1680
1681 r = whpx_init_vcpu(cpu);
1682 if (r < 0) {
1683 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1684 exit(1);
1685 }
1686
1687 /* signal CPU creation */
1688 cpu->created = true;
1689 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1690 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1691
1692 do {
1693 if (cpu_can_run(cpu)) {
1694 r = whpx_vcpu_exec(cpu);
1695 if (r == EXCP_DEBUG) {
1696 cpu_handle_guest_debug(cpu);
1697 }
1698 }
1699 while (cpu_thread_is_idle(cpu)) {
1700 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1701 }
1702 qemu_wait_io_event_common(cpu);
1703 } while (!cpu->unplug || cpu_can_run(cpu));
1704
1705 whpx_destroy_vcpu(cpu);
1706 cpu->created = false;
1707 qemu_cond_signal(&qemu_cpu_cond);
1708 qemu_mutex_unlock_iothread();
1709 rcu_unregister_thread();
c97d6d2c
SAGDR
1710 return NULL;
1711}
1712
b0cb0a66
VP
1713#ifdef _WIN32
1714static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1715{
1716}
1717#endif
1718
37257942
AB
1719/* Multi-threaded TCG
1720 *
1721 * In the multi-threaded case each vCPU has its own thread. The TLS
1722 * variable current_cpu can be used deep in the code to find the
1723 * current CPUState for a given thread.
1724 */
1725
1726static void *qemu_tcg_cpu_thread_fn(void *arg)
1727{
1728 CPUState *cpu = arg;
1729
f28d0dfd 1730 assert(tcg_enabled());
bf51c720
AB
1731 g_assert(!use_icount);
1732
37257942 1733 rcu_register_thread();
3468b59e 1734 tcg_register_thread();
37257942
AB
1735
1736 qemu_mutex_lock_iothread();
1737 qemu_thread_get_self(cpu->thread);
1738
1739 cpu->thread_id = qemu_get_thread_id();
1740 cpu->created = true;
1741 cpu->can_do_io = 1;
1742 current_cpu = cpu;
1743 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1744 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1745
1746 /* process any pending work */
1747 cpu->exit_request = 1;
1748
54961aac 1749 do {
37257942
AB
1750 if (cpu_can_run(cpu)) {
1751 int r;
d759c951 1752 qemu_mutex_unlock_iothread();
37257942 1753 r = tcg_cpu_exec(cpu);
d759c951 1754 qemu_mutex_lock_iothread();
37257942
AB
1755 switch (r) {
1756 case EXCP_DEBUG:
1757 cpu_handle_guest_debug(cpu);
1758 break;
1759 case EXCP_HALTED:
1760 /* during start-up the vCPU is reset and the thread is
1761 * kicked several times. If we don't ensure we go back
1762 * to sleep in the halted state we won't cleanly
1763 * start-up when the vCPU is enabled.
1764 *
1765 * cpu->halted should ensure we sleep in wait_io_event
1766 */
1767 g_assert(cpu->halted);
1768 break;
08e73c48
PK
1769 case EXCP_ATOMIC:
1770 qemu_mutex_unlock_iothread();
1771 cpu_exec_step_atomic(cpu);
1772 qemu_mutex_lock_iothread();
37257942
AB
1773 default:
1774 /* Ignore everything else? */
1775 break;
1776 }
1777 }
1778
37257942 1779 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1780 qemu_wait_io_event(cpu);
9b0605f9 1781 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1782
9b0605f9
PB
1783 qemu_tcg_destroy_vcpu(cpu);
1784 cpu->created = false;
1785 qemu_cond_signal(&qemu_cpu_cond);
1786 qemu_mutex_unlock_iothread();
1787 rcu_unregister_thread();
37257942
AB
1788 return NULL;
1789}
1790
2ff09a40 1791static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1792{
1793#ifndef _WIN32
1794 int err;
1795
e0c38211
PB
1796 if (cpu->thread_kicked) {
1797 return;
9102deda 1798 }
e0c38211 1799 cpu->thread_kicked = true;
814e612e 1800 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1801 if (err && err != ESRCH) {
cc015e9a
PB
1802 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1803 exit(1);
1804 }
1805#else /* _WIN32 */
b0cb0a66 1806 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1807 if (whpx_enabled()) {
1808 whpx_vcpu_kick(cpu);
1809 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1810 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1811 __func__, GetLastError());
1812 exit(1);
1813 }
1814 }
e0c38211
PB
1815#endif
1816}
ed9164a3 1817
c08d7424 1818void qemu_cpu_kick(CPUState *cpu)
296af7c9 1819{
f5c121b8 1820 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1821 if (tcg_enabled()) {
791158d9 1822 cpu_exit(cpu);
37257942 1823 /* NOP unless doing single-thread RR */
791158d9 1824 qemu_cpu_kick_rr_cpu();
e0c38211 1825 } else {
b0cb0a66
VP
1826 if (hax_enabled()) {
1827 /*
1828 * FIXME: race condition with the exit_request check in
1829 * hax_vcpu_hax_exec
1830 */
1831 cpu->exit_request = 1;
1832 }
e0c38211
PB
1833 qemu_cpu_kick_thread(cpu);
1834 }
296af7c9
BS
1835}
1836
46d62fac 1837void qemu_cpu_kick_self(void)
296af7c9 1838{
4917cf44 1839 assert(current_cpu);
9102deda 1840 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1841}
1842
60e82579 1843bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1844{
814e612e 1845 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1846}
1847
79e2b9ae 1848bool qemu_in_vcpu_thread(void)
aa723c23 1849{
4917cf44 1850 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1851}
1852
afbe7053
PB
1853static __thread bool iothread_locked = false;
1854
1855bool qemu_mutex_iothread_locked(void)
1856{
1857 return iothread_locked;
1858}
1859
cb764d06
EC
1860/*
1861 * The BQL is taken from so many places that it is worth profiling the
1862 * callers directly, instead of funneling them all through a single function.
1863 */
1864void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1865{
cb764d06
EC
1866 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1867
8d04fb55 1868 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1869 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1870 iothread_locked = true;
296af7c9
BS
1871}
1872
1873void qemu_mutex_unlock_iothread(void)
1874{
8d04fb55 1875 g_assert(qemu_mutex_iothread_locked());
afbe7053 1876 iothread_locked = false;
296af7c9
BS
1877 qemu_mutex_unlock(&qemu_global_mutex);
1878}
1879
e8faee06 1880static bool all_vcpus_paused(void)
296af7c9 1881{
bdc44640 1882 CPUState *cpu;
296af7c9 1883
bdc44640 1884 CPU_FOREACH(cpu) {
182735ef 1885 if (!cpu->stopped) {
e8faee06 1886 return false;
0ab07c62 1887 }
296af7c9
BS
1888 }
1889
e8faee06 1890 return true;
296af7c9
BS
1891}
1892
1893void pause_all_vcpus(void)
1894{
bdc44640 1895 CPUState *cpu;
296af7c9 1896
40daca54 1897 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1898 CPU_FOREACH(cpu) {
ebd05fea
DH
1899 if (qemu_cpu_is_self(cpu)) {
1900 qemu_cpu_stop(cpu, true);
1901 } else {
1902 cpu->stop = true;
1903 qemu_cpu_kick(cpu);
1904 }
d798e974
JK
1905 }
1906
d759c951
AB
1907 /* We need to drop the replay_lock so any vCPU threads woken up
1908 * can finish their replay tasks
1909 */
1910 replay_mutex_unlock();
1911
296af7c9 1912 while (!all_vcpus_paused()) {
be7d6c57 1913 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1914 CPU_FOREACH(cpu) {
182735ef 1915 qemu_cpu_kick(cpu);
296af7c9
BS
1916 }
1917 }
d759c951
AB
1918
1919 qemu_mutex_unlock_iothread();
1920 replay_mutex_lock();
1921 qemu_mutex_lock_iothread();
296af7c9
BS
1922}
1923
2993683b
IM
1924void cpu_resume(CPUState *cpu)
1925{
1926 cpu->stop = false;
1927 cpu->stopped = false;
1928 qemu_cpu_kick(cpu);
1929}
1930
296af7c9
BS
1931void resume_all_vcpus(void)
1932{
bdc44640 1933 CPUState *cpu;
296af7c9 1934
40daca54 1935 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1936 CPU_FOREACH(cpu) {
182735ef 1937 cpu_resume(cpu);
296af7c9
BS
1938 }
1939}
1940
dbadee4f 1941void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1942{
1943 cpu->stop = true;
1944 cpu->unplug = true;
1945 qemu_cpu_kick(cpu);
dbadee4f
PB
1946 qemu_mutex_unlock_iothread();
1947 qemu_thread_join(cpu->thread);
1948 qemu_mutex_lock_iothread();
2c579042
BR
1949}
1950
4900116e
DDAG
1951/* For temporary buffers for forming a name */
1952#define VCPU_THREAD_NAME_SIZE 16
1953
e5ab30a2 1954static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1955{
4900116e 1956 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1957 static QemuCond *single_tcg_halt_cond;
1958 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1959 static int tcg_region_inited;
1960
f28d0dfd 1961 assert(tcg_enabled());
e8feb96f
EC
1962 /*
1963 * Initialize TCG regions--once. Now is a good time, because:
1964 * (1) TCG's init context, prologue and target globals have been set up.
1965 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1966 * -accel flag is processed, so the check doesn't work then).
1967 */
1968 if (!tcg_region_inited) {
1969 tcg_region_inited = 1;
1970 tcg_region_init();
1971 }
4900116e 1972
37257942 1973 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1974 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1975 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1976 qemu_cond_init(cpu->halt_cond);
37257942
AB
1977
1978 if (qemu_tcg_mttcg_enabled()) {
1979 /* create a thread per vCPU with TCG (MTTCG) */
1980 parallel_cpus = true;
1981 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1982 cpu->cpu_index);
37257942
AB
1983
1984 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1985 cpu, QEMU_THREAD_JOINABLE);
1986
1987 } else {
1988 /* share a single thread for all cpus with TCG */
1989 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1990 qemu_thread_create(cpu->thread, thread_name,
1991 qemu_tcg_rr_cpu_thread_fn,
1992 cpu, QEMU_THREAD_JOINABLE);
1993
1994 single_tcg_halt_cond = cpu->halt_cond;
1995 single_tcg_cpu_thread = cpu->thread;
1996 }
1ecf47bf 1997#ifdef _WIN32
814e612e 1998 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1999#endif
296af7c9 2000 } else {
37257942
AB
2001 /* For non-MTTCG cases we share the thread */
2002 cpu->thread = single_tcg_cpu_thread;
2003 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
2004 cpu->thread_id = first_cpu->thread_id;
2005 cpu->can_do_io = 1;
2006 cpu->created = true;
296af7c9
BS
2007 }
2008}
2009
b0cb0a66
VP
2010static void qemu_hax_start_vcpu(CPUState *cpu)
2011{
2012 char thread_name[VCPU_THREAD_NAME_SIZE];
2013
2014 cpu->thread = g_malloc0(sizeof(QemuThread));
2015 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2016 qemu_cond_init(cpu->halt_cond);
2017
2018 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2019 cpu->cpu_index);
2020 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2021 cpu, QEMU_THREAD_JOINABLE);
2022#ifdef _WIN32
2023 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2024#endif
b0cb0a66
VP
2025}
2026
48a106bd 2027static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2028{
4900116e
DDAG
2029 char thread_name[VCPU_THREAD_NAME_SIZE];
2030
814e612e 2031 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2032 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2033 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2034 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2035 cpu->cpu_index);
2036 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2037 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2038}
2039
c97d6d2c
SAGDR
2040static void qemu_hvf_start_vcpu(CPUState *cpu)
2041{
2042 char thread_name[VCPU_THREAD_NAME_SIZE];
2043
2044 /* HVF currently does not support TCG, and only runs in
2045 * unrestricted-guest mode. */
2046 assert(hvf_enabled());
2047
2048 cpu->thread = g_malloc0(sizeof(QemuThread));
2049 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2050 qemu_cond_init(cpu->halt_cond);
2051
2052 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2053 cpu->cpu_index);
2054 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2055 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2056}
2057
19306806
JTV
2058static void qemu_whpx_start_vcpu(CPUState *cpu)
2059{
2060 char thread_name[VCPU_THREAD_NAME_SIZE];
2061
2062 cpu->thread = g_malloc0(sizeof(QemuThread));
2063 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2064 qemu_cond_init(cpu->halt_cond);
2065 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2066 cpu->cpu_index);
2067 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2068 cpu, QEMU_THREAD_JOINABLE);
2069#ifdef _WIN32
2070 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2071#endif
19306806
JTV
2072}
2073
10a9021d 2074static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2075{
4900116e
DDAG
2076 char thread_name[VCPU_THREAD_NAME_SIZE];
2077
814e612e 2078 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2079 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2080 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2081 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2082 cpu->cpu_index);
2083 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2084 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2085}
2086
c643bed9 2087void qemu_init_vcpu(CPUState *cpu)
296af7c9 2088{
5cc8767d
LX
2089 MachineState *ms = MACHINE(qdev_get_machine());
2090
2091 cpu->nr_cores = ms->smp.cores;
2092 cpu->nr_threads = ms->smp.threads;
f324e766 2093 cpu->stopped = true;
9c09a251 2094 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2095
2096 if (!cpu->as) {
2097 /* If the target cpu hasn't set up any address spaces itself,
2098 * give it the default one.
2099 */
12ebc9a7 2100 cpu->num_ases = 1;
80ceb07a 2101 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2102 }
2103
0ab07c62 2104 if (kvm_enabled()) {
48a106bd 2105 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2106 } else if (hax_enabled()) {
2107 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2108 } else if (hvf_enabled()) {
2109 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2110 } else if (tcg_enabled()) {
e5ab30a2 2111 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2112 } else if (whpx_enabled()) {
2113 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2114 } else {
10a9021d 2115 qemu_dummy_start_vcpu(cpu);
0ab07c62 2116 }
81e96311
DH
2117
2118 while (!cpu->created) {
2119 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2120 }
296af7c9
BS
2121}
2122
b4a3d965 2123void cpu_stop_current(void)
296af7c9 2124{
4917cf44 2125 if (current_cpu) {
0ec7e677
PM
2126 current_cpu->stop = true;
2127 cpu_exit(current_cpu);
b4a3d965 2128 }
296af7c9
BS
2129}
2130
56983463 2131int vm_stop(RunState state)
296af7c9 2132{
aa723c23 2133 if (qemu_in_vcpu_thread()) {
74892d24 2134 qemu_system_vmstop_request_prepare();
1dfb4dd9 2135 qemu_system_vmstop_request(state);
296af7c9
BS
2136 /*
2137 * FIXME: should not return to device code in case
2138 * vm_stop() has been requested.
2139 */
b4a3d965 2140 cpu_stop_current();
56983463 2141 return 0;
296af7c9 2142 }
56983463 2143
4486e89c 2144 return do_vm_stop(state, true);
296af7c9
BS
2145}
2146
2d76e823
CI
2147/**
2148 * Prepare for (re)starting the VM.
2149 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2150 * running or in case of an error condition), 0 otherwise.
2151 */
2152int vm_prepare_start(void)
2153{
2154 RunState requested;
2d76e823
CI
2155
2156 qemu_vmstop_requested(&requested);
2157 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2158 return -1;
2159 }
2160
2161 /* Ensure that a STOP/RESUME pair of events is emitted if a
2162 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2163 * example, according to documentation is always followed by
2164 * the STOP event.
2165 */
2166 if (runstate_is_running()) {
3ab72385
PX
2167 qapi_event_send_stop();
2168 qapi_event_send_resume();
f056158d 2169 return -1;
2d76e823
CI
2170 }
2171
2172 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2173 qapi_event_send_resume();
f056158d
MA
2174
2175 replay_enable_events();
2176 cpu_enable_ticks();
2177 runstate_set(RUN_STATE_RUNNING);
2178 vm_state_notify(1, RUN_STATE_RUNNING);
2179 return 0;
2d76e823
CI
2180}
2181
2182void vm_start(void)
2183{
2184 if (!vm_prepare_start()) {
2185 resume_all_vcpus();
2186 }
2187}
2188
8a9236f1
LC
2189/* does a state transition even if the VM is already stopped,
2190 current state is forgotten forever */
56983463 2191int vm_stop_force_state(RunState state)
8a9236f1
LC
2192{
2193 if (runstate_is_running()) {
56983463 2194 return vm_stop(state);
8a9236f1
LC
2195 } else {
2196 runstate_set(state);
b2780d32
WC
2197
2198 bdrv_drain_all();
594a45ce
KW
2199 /* Make sure to return an error if the flush in a previous vm_stop()
2200 * failed. */
22af08ea 2201 return bdrv_flush_all();
8a9236f1
LC
2202 }
2203}
2204
0442428a 2205void list_cpus(const char *optarg)
262353cb
BS
2206{
2207 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2208#if defined(cpu_list)
0442428a 2209 cpu_list();
262353cb
BS
2210#endif
2211}
de0b36b6 2212
0cfd6a9a
LC
2213void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2214 bool has_cpu, int64_t cpu_index, Error **errp)
2215{
2216 FILE *f;
2217 uint32_t l;
55e5c285 2218 CPUState *cpu;
0cfd6a9a 2219 uint8_t buf[1024];
0dc9daf0 2220 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2221
2222 if (!has_cpu) {
2223 cpu_index = 0;
2224 }
2225
151d1322
AF
2226 cpu = qemu_get_cpu(cpu_index);
2227 if (cpu == NULL) {
c6bd8c70
MA
2228 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2229 "a CPU number");
0cfd6a9a
LC
2230 return;
2231 }
2232
2233 f = fopen(filename, "wb");
2234 if (!f) {
618da851 2235 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2236 return;
2237 }
2238
2239 while (size != 0) {
2240 l = sizeof(buf);
2241 if (l > size)
2242 l = size;
2f4d0f59 2243 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2244 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2245 " specified", orig_addr, orig_size);
2f4d0f59
AK
2246 goto exit;
2247 }
0cfd6a9a 2248 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2249 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2250 goto exit;
2251 }
2252 addr += l;
2253 size -= l;
2254 }
2255
2256exit:
2257 fclose(f);
2258}
6d3962bf
LC
2259
2260void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2261 Error **errp)
2262{
2263 FILE *f;
2264 uint32_t l;
2265 uint8_t buf[1024];
2266
2267 f = fopen(filename, "wb");
2268 if (!f) {
618da851 2269 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2270 return;
2271 }
2272
2273 while (size != 0) {
2274 l = sizeof(buf);
2275 if (l > size)
2276 l = size;
eb6282f2 2277 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2278 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2279 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2280 goto exit;
2281 }
2282 addr += l;
2283 size -= l;
2284 }
2285
2286exit:
2287 fclose(f);
2288}
ab49ab5c
LC
2289
2290void qmp_inject_nmi(Error **errp)
2291{
9cb805fd 2292 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2293}
27498bef 2294
76c86615 2295void dump_drift_info(void)
27498bef
ST
2296{
2297 if (!use_icount) {
2298 return;
2299 }
2300
76c86615 2301 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2302 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2303 if (icount_align_option) {
76c86615
MA
2304 qemu_printf("Max guest delay %"PRIi64" ms\n",
2305 -max_delay / SCALE_MS);
2306 qemu_printf("Max guest advance %"PRIi64" ms\n",
2307 max_advance / SCALE_MS);
27498bef 2308 } else {
76c86615
MA
2309 qemu_printf("Max guest delay NA\n");
2310 qemu_printf("Max guest advance NA\n");
27498bef
ST
2311 }
2312}