]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
scripts/qemu-gdb/timers.py: new helper to dump timer state
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
c97d6d2c 40#include "sysemu/hvf.h"
de0b36b6 41#include "qmp-commands.h"
63c91552 42#include "exec/exec-all.h"
296af7c9 43
1de7afc9 44#include "qemu/thread.h"
9c17d615
PB
45#include "sysemu/cpus.h"
46#include "sysemu/qtest.h"
1de7afc9
PB
47#include "qemu/main-loop.h"
48#include "qemu/bitmap.h"
cb365646 49#include "qemu/seqlock.h"
8d4e9146 50#include "tcg.h"
a4e15de9 51#include "qapi-event.h"
9cb805fd 52#include "hw/nmi.h"
8b427044 53#include "sysemu/replay.h"
afed5a5a 54#include "hw/boards.h"
0ff0fc19 55
6d9cb73c
JK
56#ifdef CONFIG_LINUX
57
58#include <sys/prctl.h>
59
c0532a76
MT
60#ifndef PR_MCE_KILL
61#define PR_MCE_KILL 33
62#endif
63
6d9cb73c
JK
64#ifndef PR_MCE_KILL_SET
65#define PR_MCE_KILL_SET 1
66#endif
67
68#ifndef PR_MCE_KILL_EARLY
69#define PR_MCE_KILL_EARLY 1
70#endif
71
72#endif /* CONFIG_LINUX */
73
27498bef
ST
74int64_t max_delay;
75int64_t max_advance;
296af7c9 76
2adcc85d
JH
77/* vcpu throttling controls */
78static QEMUTimer *throttle_timer;
79static unsigned int throttle_percentage;
80
81#define CPU_THROTTLE_PCT_MIN 1
82#define CPU_THROTTLE_PCT_MAX 99
83#define CPU_THROTTLE_TIMESLICE_NS 10000000
84
321bc0b2
TC
85bool cpu_is_stopped(CPUState *cpu)
86{
87 return cpu->stopped || !runstate_is_running();
88}
89
a98ae1d8 90static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 91{
c64ca814 92 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
93 return false;
94 }
321bc0b2 95 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
96 return true;
97 }
8c2e1b00 98 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 99 kvm_halt_in_kernel()) {
ac873f1e
PM
100 return false;
101 }
102 return true;
103}
104
105static bool all_cpu_threads_idle(void)
106{
182735ef 107 CPUState *cpu;
ac873f1e 108
bdc44640 109 CPU_FOREACH(cpu) {
182735ef 110 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
111 return false;
112 }
113 }
114 return true;
115}
116
946fb27c
PB
117/***********************************************************/
118/* guest cycle counter */
119
a3270e19
PB
120/* Protected by TimersState seqlock */
121
5045e9d9 122static bool icount_sleep = true;
71468395 123static int64_t vm_clock_warp_start = -1;
946fb27c
PB
124/* Conversion factor from emulated instructions to virtual clock ticks. */
125static int icount_time_shift;
126/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
127#define MAX_ICOUNT_SHIFT 10
a3270e19 128
946fb27c
PB
129static QEMUTimer *icount_rt_timer;
130static QEMUTimer *icount_vm_timer;
131static QEMUTimer *icount_warp_timer;
946fb27c
PB
132
133typedef struct TimersState {
cb365646 134 /* Protected by BQL. */
946fb27c
PB
135 int64_t cpu_ticks_prev;
136 int64_t cpu_ticks_offset;
cb365646
LPF
137
138 /* cpu_clock_offset can be read out of BQL, so protect it with
139 * this lock.
140 */
141 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
142 int64_t cpu_clock_offset;
143 int32_t cpu_ticks_enabled;
144 int64_t dummy;
c96778bb
FK
145
146 /* Compensate for varying guest execution speed. */
147 int64_t qemu_icount_bias;
148 /* Only written by TCG thread */
149 int64_t qemu_icount;
946fb27c
PB
150} TimersState;
151
d9cd4007 152static TimersState timers_state;
8d4e9146
FK
153bool mttcg_enabled;
154
155/*
156 * We default to false if we know other options have been enabled
157 * which are currently incompatible with MTTCG. Otherwise when each
158 * guest (target) has been updated to support:
159 * - atomic instructions
160 * - memory ordering primitives (barriers)
161 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
162 *
163 * Once a guest architecture has been converted to the new primitives
164 * there are two remaining limitations to check.
165 *
166 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
167 * - The host must have a stronger memory order than the guest
168 *
169 * It may be possible in future to support strong guests on weak hosts
170 * but that will require tagging all load/stores in a guest with their
171 * implicit memory order requirements which would likely slow things
172 * down a lot.
173 */
174
175static bool check_tcg_memory_orders_compatible(void)
176{
177#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
178 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
179#else
180 return false;
181#endif
182}
183
184static bool default_mttcg_enabled(void)
185{
83fd9629 186 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
187 return false;
188 } else {
189#ifdef TARGET_SUPPORTS_MTTCG
190 return check_tcg_memory_orders_compatible();
191#else
192 return false;
193#endif
194 }
195}
196
197void qemu_tcg_configure(QemuOpts *opts, Error **errp)
198{
199 const char *t = qemu_opt_get(opts, "thread");
200 if (t) {
201 if (strcmp(t, "multi") == 0) {
202 if (TCG_OVERSIZED_GUEST) {
203 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
204 } else if (use_icount) {
205 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 206 } else {
86953503 207#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
208 error_report("Guest not yet converted to MTTCG - "
209 "you may get unexpected results");
210#endif
8d4e9146
FK
211 if (!check_tcg_memory_orders_compatible()) {
212 error_report("Guest expects a stronger memory ordering "
213 "than the host provides");
8cfef892 214 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
215 }
216 mttcg_enabled = true;
217 }
218 } else if (strcmp(t, "single") == 0) {
219 mttcg_enabled = false;
220 } else {
221 error_setg(errp, "Invalid 'thread' setting %s", t);
222 }
223 } else {
224 mttcg_enabled = default_mttcg_enabled();
225 }
226}
946fb27c 227
e4cd9657
AB
228/* The current number of executed instructions is based on what we
229 * originally budgeted minus the current state of the decrementing
230 * icount counters in extra/u16.low.
231 */
232static int64_t cpu_get_icount_executed(CPUState *cpu)
233{
234 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
235}
236
512d3c80
AB
237/*
238 * Update the global shared timer_state.qemu_icount to take into
239 * account executed instructions. This is done by the TCG vCPU
240 * thread so the main-loop can see time has moved forward.
241 */
242void cpu_update_icount(CPUState *cpu)
243{
244 int64_t executed = cpu_get_icount_executed(cpu);
245 cpu->icount_budget -= executed;
246
247#ifdef CONFIG_ATOMIC64
248 atomic_set__nocheck(&timers_state.qemu_icount,
249 atomic_read__nocheck(&timers_state.qemu_icount) +
250 executed);
251#else /* FIXME: we need 64bit atomics to do this safely */
252 timers_state.qemu_icount += executed;
253#endif
254}
255
2a62914b 256int64_t cpu_get_icount_raw(void)
946fb27c 257{
4917cf44 258 CPUState *cpu = current_cpu;
946fb27c 259
243c5f77 260 if (cpu && cpu->running) {
414b15c9 261 if (!cpu->can_do_io) {
2a62914b
PD
262 fprintf(stderr, "Bad icount read\n");
263 exit(1);
946fb27c 264 }
e4cd9657 265 /* Take into account what has run */
1d05906b 266 cpu_update_icount(cpu);
946fb27c 267 }
1d05906b
AB
268#ifdef CONFIG_ATOMIC64
269 return atomic_read__nocheck(&timers_state.qemu_icount);
270#else /* FIXME: we need 64bit atomics to do this safely */
271 return timers_state.qemu_icount;
272#endif
2a62914b
PD
273}
274
275/* Return the virtual CPU time, based on the instruction counter. */
276static int64_t cpu_get_icount_locked(void)
277{
278 int64_t icount = cpu_get_icount_raw();
3f031313 279 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
280}
281
17a15f1b
PB
282int64_t cpu_get_icount(void)
283{
284 int64_t icount;
285 unsigned start;
286
287 do {
288 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
289 icount = cpu_get_icount_locked();
290 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
291
292 return icount;
293}
294
3f031313
FK
295int64_t cpu_icount_to_ns(int64_t icount)
296{
297 return icount << icount_time_shift;
298}
299
d90f3cca
C
300/* return the time elapsed in VM between vm_start and vm_stop. Unless
301 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
302 * counter.
303 *
304 * Caller must hold the BQL
305 */
946fb27c
PB
306int64_t cpu_get_ticks(void)
307{
5f3e3101
PB
308 int64_t ticks;
309
946fb27c
PB
310 if (use_icount) {
311 return cpu_get_icount();
312 }
5f3e3101
PB
313
314 ticks = timers_state.cpu_ticks_offset;
315 if (timers_state.cpu_ticks_enabled) {
4a7428c5 316 ticks += cpu_get_host_ticks();
5f3e3101
PB
317 }
318
319 if (timers_state.cpu_ticks_prev > ticks) {
320 /* Note: non increasing ticks may happen if the host uses
321 software suspend */
322 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
323 ticks = timers_state.cpu_ticks_prev;
946fb27c 324 }
5f3e3101
PB
325
326 timers_state.cpu_ticks_prev = ticks;
327 return ticks;
946fb27c
PB
328}
329
cb365646 330static int64_t cpu_get_clock_locked(void)
946fb27c 331{
1d45cea5 332 int64_t time;
cb365646 333
1d45cea5 334 time = timers_state.cpu_clock_offset;
5f3e3101 335 if (timers_state.cpu_ticks_enabled) {
1d45cea5 336 time += get_clock();
946fb27c 337 }
cb365646 338
1d45cea5 339 return time;
cb365646
LPF
340}
341
d90f3cca 342/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
343 * the time between vm_start and vm_stop
344 */
cb365646
LPF
345int64_t cpu_get_clock(void)
346{
347 int64_t ti;
348 unsigned start;
349
350 do {
351 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
352 ti = cpu_get_clock_locked();
353 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
354
355 return ti;
946fb27c
PB
356}
357
cb365646 358/* enable cpu_get_ticks()
3224e878 359 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 360 */
946fb27c
PB
361void cpu_enable_ticks(void)
362{
cb365646 363 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 365 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 366 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
367 timers_state.cpu_clock_offset -= get_clock();
368 timers_state.cpu_ticks_enabled = 1;
369 }
03719e44 370 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
371}
372
373/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 374 * cpu_get_ticks() after that.
3224e878 375 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 376 */
946fb27c
PB
377void cpu_disable_ticks(void)
378{
cb365646 379 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 380 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 381 if (timers_state.cpu_ticks_enabled) {
4a7428c5 382 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 383 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
384 timers_state.cpu_ticks_enabled = 0;
385 }
03719e44 386 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
387}
388
389/* Correlation between real and virtual time is always going to be
390 fairly approximate, so ignore small variation.
391 When the guest is idle real and virtual time will be aligned in
392 the IO wait loop. */
73bcb24d 393#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
394
395static void icount_adjust(void)
396{
397 int64_t cur_time;
398 int64_t cur_icount;
399 int64_t delta;
a3270e19
PB
400
401 /* Protected by TimersState mutex. */
946fb27c 402 static int64_t last_delta;
468cc7cf 403
946fb27c
PB
404 /* If the VM is not running, then do nothing. */
405 if (!runstate_is_running()) {
406 return;
407 }
468cc7cf 408
03719e44 409 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
410 cur_time = cpu_get_clock_locked();
411 cur_icount = cpu_get_icount_locked();
468cc7cf 412
946fb27c
PB
413 delta = cur_icount - cur_time;
414 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
415 if (delta > 0
416 && last_delta + ICOUNT_WOBBLE < delta * 2
417 && icount_time_shift > 0) {
418 /* The guest is getting too far ahead. Slow time down. */
419 icount_time_shift--;
420 }
421 if (delta < 0
422 && last_delta - ICOUNT_WOBBLE > delta * 2
423 && icount_time_shift < MAX_ICOUNT_SHIFT) {
424 /* The guest is getting too far behind. Speed time up. */
425 icount_time_shift++;
426 }
427 last_delta = delta;
c96778bb
FK
428 timers_state.qemu_icount_bias = cur_icount
429 - (timers_state.qemu_icount << icount_time_shift);
03719e44 430 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
431}
432
433static void icount_adjust_rt(void *opaque)
434{
40daca54 435 timer_mod(icount_rt_timer,
1979b908 436 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
437 icount_adjust();
438}
439
440static void icount_adjust_vm(void *opaque)
441{
40daca54
AB
442 timer_mod(icount_vm_timer,
443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 444 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
445 icount_adjust();
446}
447
448static int64_t qemu_icount_round(int64_t count)
449{
450 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
451}
452
efab87cf 453static void icount_warp_rt(void)
946fb27c 454{
ccffff48
AB
455 unsigned seq;
456 int64_t warp_start;
457
17a15f1b
PB
458 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
459 * changes from -1 to another value, so the race here is okay.
460 */
ccffff48
AB
461 do {
462 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
463 warp_start = vm_clock_warp_start;
464 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
465
466 if (warp_start == -1) {
946fb27c
PB
467 return;
468 }
469
03719e44 470 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 471 if (runstate_is_running()) {
8eda206e
PD
472 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
473 cpu_get_clock_locked());
8ed961d9
PB
474 int64_t warp_delta;
475
476 warp_delta = clock - vm_clock_warp_start;
477 if (use_icount == 2) {
946fb27c 478 /*
40daca54 479 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
480 * far ahead of real time.
481 */
17a15f1b 482 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 483 int64_t delta = clock - cur_icount;
8ed961d9 484 warp_delta = MIN(warp_delta, delta);
946fb27c 485 }
c96778bb 486 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
487 }
488 vm_clock_warp_start = -1;
03719e44 489 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
490
491 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
493 }
946fb27c
PB
494}
495
e76d1798 496static void icount_timer_cb(void *opaque)
efab87cf 497{
e76d1798
PD
498 /* No need for a checkpoint because the timer already synchronizes
499 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
500 */
501 icount_warp_rt();
efab87cf
PD
502}
503
8156be56
PB
504void qtest_clock_warp(int64_t dest)
505{
40daca54 506 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 507 AioContext *aio_context;
8156be56 508 assert(qtest_enabled());
efef88b3 509 aio_context = qemu_get_aio_context();
8156be56 510 while (clock < dest) {
40daca54 511 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 512 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 513
03719e44 514 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 515 timers_state.qemu_icount_bias += warp;
03719e44 516 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 517
40daca54 518 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 519 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 520 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 521 }
40daca54 522 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
523}
524
e76d1798 525void qemu_start_warp_timer(void)
946fb27c 526{
ce78d18c 527 int64_t clock;
946fb27c
PB
528 int64_t deadline;
529
e76d1798 530 if (!use_icount) {
946fb27c
PB
531 return;
532 }
533
8bd7f71d
PD
534 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
535 * do not fire, so computing the deadline does not make sense.
536 */
537 if (!runstate_is_running()) {
538 return;
539 }
540
541 /* warp clock deterministically in record/replay mode */
e76d1798 542 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
543 return;
544 }
545
ce78d18c 546 if (!all_cpu_threads_idle()) {
946fb27c
PB
547 return;
548 }
549
8156be56
PB
550 if (qtest_enabled()) {
551 /* When testing, qtest commands advance icount. */
e76d1798 552 return;
8156be56
PB
553 }
554
ac70aafc 555 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 558 if (deadline < 0) {
d7a0f71d
VC
559 static bool notified;
560 if (!icount_sleep && !notified) {
3dc6f869 561 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
562 notified = true;
563 }
ce78d18c 564 return;
ac70aafc
AB
565 }
566
946fb27c
PB
567 if (deadline > 0) {
568 /*
40daca54 569 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
570 * sleep. Otherwise, the CPU might be waiting for a future timer
571 * interrupt to wake it up, but the interrupt never comes because
572 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 573 * QEMU_CLOCK_VIRTUAL.
946fb27c 574 */
5045e9d9
VC
575 if (!icount_sleep) {
576 /*
577 * We never let VCPUs sleep in no sleep icount mode.
578 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
579 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
580 * It is useful when we want a deterministic execution time,
581 * isolated from host latencies.
582 */
03719e44 583 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 584 timers_state.qemu_icount_bias += deadline;
03719e44 585 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
03719e44 596 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
597 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
598 vm_clock_warp_start = clock;
599 }
03719e44 600 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 601 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 602 }
ac70aafc 603 } else if (deadline == 0) {
40daca54 604 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
605 }
606}
607
e76d1798
PD
608static void qemu_account_warp_timer(void)
609{
610 if (!use_icount || !icount_sleep) {
611 return;
612 }
613
614 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
615 * do not fire, so computing the deadline does not make sense.
616 */
617 if (!runstate_is_running()) {
618 return;
619 }
620
621 /* warp clock deterministically in record/replay mode */
622 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
623 return;
624 }
625
626 timer_del(icount_warp_timer);
627 icount_warp_rt();
628}
629
d09eae37
FK
630static bool icount_state_needed(void *opaque)
631{
632 return use_icount;
633}
634
635/*
636 * This is a subsection for icount migration.
637 */
638static const VMStateDescription icount_vmstate_timers = {
639 .name = "timer/icount",
640 .version_id = 1,
641 .minimum_version_id = 1,
5cd8cada 642 .needed = icount_state_needed,
d09eae37
FK
643 .fields = (VMStateField[]) {
644 VMSTATE_INT64(qemu_icount_bias, TimersState),
645 VMSTATE_INT64(qemu_icount, TimersState),
646 VMSTATE_END_OF_LIST()
647 }
648};
649
946fb27c
PB
650static const VMStateDescription vmstate_timers = {
651 .name = "timer",
652 .version_id = 2,
653 .minimum_version_id = 1,
35d08458 654 .fields = (VMStateField[]) {
946fb27c
PB
655 VMSTATE_INT64(cpu_ticks_offset, TimersState),
656 VMSTATE_INT64(dummy, TimersState),
657 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
658 VMSTATE_END_OF_LIST()
d09eae37 659 },
5cd8cada
JQ
660 .subsections = (const VMStateDescription*[]) {
661 &icount_vmstate_timers,
662 NULL
946fb27c
PB
663 }
664};
665
14e6fe12 666static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 667{
2adcc85d
JH
668 double pct;
669 double throttle_ratio;
670 long sleeptime_ns;
671
672 if (!cpu_throttle_get_percentage()) {
673 return;
674 }
675
676 pct = (double)cpu_throttle_get_percentage()/100;
677 throttle_ratio = pct / (1 - pct);
678 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
679
680 qemu_mutex_unlock_iothread();
2adcc85d
JH
681 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
682 qemu_mutex_lock_iothread();
90bb0c04 683 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
684}
685
686static void cpu_throttle_timer_tick(void *opaque)
687{
688 CPUState *cpu;
689 double pct;
690
691 /* Stop the timer if needed */
692 if (!cpu_throttle_get_percentage()) {
693 return;
694 }
695 CPU_FOREACH(cpu) {
696 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
697 async_run_on_cpu(cpu, cpu_throttle_thread,
698 RUN_ON_CPU_NULL);
2adcc85d
JH
699 }
700 }
701
702 pct = (double)cpu_throttle_get_percentage()/100;
703 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
704 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
705}
706
707void cpu_throttle_set(int new_throttle_pct)
708{
709 /* Ensure throttle percentage is within valid range */
710 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
711 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
712
713 atomic_set(&throttle_percentage, new_throttle_pct);
714
715 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
716 CPU_THROTTLE_TIMESLICE_NS);
717}
718
719void cpu_throttle_stop(void)
720{
721 atomic_set(&throttle_percentage, 0);
722}
723
724bool cpu_throttle_active(void)
725{
726 return (cpu_throttle_get_percentage() != 0);
727}
728
729int cpu_throttle_get_percentage(void)
730{
731 return atomic_read(&throttle_percentage);
732}
733
4603ea01
PD
734void cpu_ticks_init(void)
735{
ccdb3c1f 736 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 737 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
738 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
739 cpu_throttle_timer_tick, NULL);
4603ea01
PD
740}
741
1ad9580b 742void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 743{
1ad9580b 744 const char *option;
a8bfac37 745 char *rem_str = NULL;
1ad9580b 746
1ad9580b 747 option = qemu_opt_get(opts, "shift");
946fb27c 748 if (!option) {
a8bfac37
ST
749 if (qemu_opt_get(opts, "align") != NULL) {
750 error_setg(errp, "Please specify shift option when using align");
751 }
946fb27c
PB
752 return;
753 }
f1f4b57e
VC
754
755 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
756 if (icount_sleep) {
757 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 758 icount_timer_cb, NULL);
5045e9d9 759 }
f1f4b57e 760
a8bfac37 761 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
762
763 if (icount_align_option && !icount_sleep) {
778d9f9b 764 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 765 }
946fb27c 766 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
767 errno = 0;
768 icount_time_shift = strtol(option, &rem_str, 0);
769 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
770 error_setg(errp, "icount: Invalid shift value");
771 }
946fb27c
PB
772 use_icount = 1;
773 return;
a8bfac37
ST
774 } else if (icount_align_option) {
775 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 776 } else if (!icount_sleep) {
778d9f9b 777 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
778 }
779
780 use_icount = 2;
781
782 /* 125MIPS seems a reasonable initial guess at the guest speed.
783 It will be corrected fairly quickly anyway. */
784 icount_time_shift = 3;
785
786 /* Have both realtime and virtual time triggers for speed adjustment.
787 The realtime trigger catches emulated time passing too slowly,
788 the virtual time trigger catches emulated time passing too fast.
789 Realtime triggers occur even when idle, so use them less frequently
790 than VM triggers. */
bf2a7ddb
PD
791 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
792 icount_adjust_rt, NULL);
40daca54 793 timer_mod(icount_rt_timer,
bf2a7ddb 794 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
795 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
796 icount_adjust_vm, NULL);
797 timer_mod(icount_vm_timer,
798 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 799 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
800}
801
6546706d
AB
802/***********************************************************/
803/* TCG vCPU kick timer
804 *
805 * The kick timer is responsible for moving single threaded vCPU
806 * emulation on to the next vCPU. If more than one vCPU is running a
807 * timer event with force a cpu->exit so the next vCPU can get
808 * scheduled.
809 *
810 * The timer is removed if all vCPUs are idle and restarted again once
811 * idleness is complete.
812 */
813
814static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 815static CPUState *tcg_current_rr_cpu;
6546706d
AB
816
817#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
818
819static inline int64_t qemu_tcg_next_kick(void)
820{
821 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
822}
823
791158d9
AB
824/* Kick the currently round-robin scheduled vCPU */
825static void qemu_cpu_kick_rr_cpu(void)
826{
827 CPUState *cpu;
791158d9
AB
828 do {
829 cpu = atomic_mb_read(&tcg_current_rr_cpu);
830 if (cpu) {
831 cpu_exit(cpu);
832 }
833 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
834}
835
6b8f0187
PB
836static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
837{
838}
839
3f53bc61
PB
840void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
841{
6b8f0187
PB
842 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
843 qemu_notify_event();
844 return;
845 }
846
847 if (!qemu_in_vcpu_thread() && first_cpu) {
848 /* qemu_cpu_kick is not enough to kick a halted CPU out of
849 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
850 * causes cpu_thread_is_idle to return false. This way,
851 * handle_icount_deadline can run.
852 */
853 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
854 }
3f53bc61
PB
855}
856
6546706d
AB
857static void kick_tcg_thread(void *opaque)
858{
859 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 860 qemu_cpu_kick_rr_cpu();
6546706d
AB
861}
862
863static void start_tcg_kick_timer(void)
864{
37257942 865 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
866 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
867 kick_tcg_thread, NULL);
868 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
869 }
870}
871
872static void stop_tcg_kick_timer(void)
873{
874 if (tcg_kick_vcpu_timer) {
875 timer_del(tcg_kick_vcpu_timer);
876 tcg_kick_vcpu_timer = NULL;
877 }
878}
879
296af7c9
BS
880/***********************************************************/
881void hw_error(const char *fmt, ...)
882{
883 va_list ap;
55e5c285 884 CPUState *cpu;
296af7c9
BS
885
886 va_start(ap, fmt);
887 fprintf(stderr, "qemu: hardware error: ");
888 vfprintf(stderr, fmt, ap);
889 fprintf(stderr, "\n");
bdc44640 890 CPU_FOREACH(cpu) {
55e5c285 891 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 892 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
893 }
894 va_end(ap);
895 abort();
896}
897
898void cpu_synchronize_all_states(void)
899{
182735ef 900 CPUState *cpu;
296af7c9 901
bdc44640 902 CPU_FOREACH(cpu) {
182735ef 903 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
904 /* TODO: move to cpu_synchronize_state() */
905 if (hvf_enabled()) {
906 hvf_cpu_synchronize_state(cpu);
907 }
296af7c9
BS
908 }
909}
910
911void cpu_synchronize_all_post_reset(void)
912{
182735ef 913 CPUState *cpu;
296af7c9 914
bdc44640 915 CPU_FOREACH(cpu) {
182735ef 916 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
917 /* TODO: move to cpu_synchronize_post_reset() */
918 if (hvf_enabled()) {
919 hvf_cpu_synchronize_post_reset(cpu);
920 }
296af7c9
BS
921 }
922}
923
924void cpu_synchronize_all_post_init(void)
925{
182735ef 926 CPUState *cpu;
296af7c9 927
bdc44640 928 CPU_FOREACH(cpu) {
182735ef 929 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
930 /* TODO: move to cpu_synchronize_post_init() */
931 if (hvf_enabled()) {
932 hvf_cpu_synchronize_post_init(cpu);
933 }
296af7c9
BS
934 }
935}
936
75e972da
DG
937void cpu_synchronize_all_pre_loadvm(void)
938{
939 CPUState *cpu;
940
941 CPU_FOREACH(cpu) {
942 cpu_synchronize_pre_loadvm(cpu);
943 }
944}
945
56983463 946static int do_vm_stop(RunState state)
296af7c9 947{
56983463
KW
948 int ret = 0;
949
1354869c 950 if (runstate_is_running()) {
296af7c9 951 cpu_disable_ticks();
296af7c9 952 pause_all_vcpus();
f5bbfba1 953 runstate_set(state);
1dfb4dd9 954 vm_state_notify(0, state);
a4e15de9 955 qapi_event_send_stop(&error_abort);
296af7c9 956 }
56983463 957
594a45ce 958 bdrv_drain_all();
6d0ceb80 959 replay_disable_events();
22af08ea 960 ret = bdrv_flush_all();
594a45ce 961
56983463 962 return ret;
296af7c9
BS
963}
964
a1fcaa73 965static bool cpu_can_run(CPUState *cpu)
296af7c9 966{
4fdeee7c 967 if (cpu->stop) {
a1fcaa73 968 return false;
0ab07c62 969 }
321bc0b2 970 if (cpu_is_stopped(cpu)) {
a1fcaa73 971 return false;
0ab07c62 972 }
a1fcaa73 973 return true;
296af7c9
BS
974}
975
91325046 976static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 977{
64f6b346 978 gdb_set_stop_cpu(cpu);
8cf71710 979 qemu_system_debug_request();
f324e766 980 cpu->stopped = true;
3c638d06
JK
981}
982
6d9cb73c
JK
983#ifdef CONFIG_LINUX
984static void sigbus_reraise(void)
985{
986 sigset_t set;
987 struct sigaction action;
988
989 memset(&action, 0, sizeof(action));
990 action.sa_handler = SIG_DFL;
991 if (!sigaction(SIGBUS, &action, NULL)) {
992 raise(SIGBUS);
993 sigemptyset(&set);
994 sigaddset(&set, SIGBUS);
a2d1761d 995 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
996 }
997 perror("Failed to re-raise SIGBUS!\n");
998 abort();
999}
1000
d98d4072 1001static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1002{
a16fc07e
PB
1003 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1004 sigbus_reraise();
1005 }
1006
2ae41db2
PB
1007 if (current_cpu) {
1008 /* Called asynchronously in VCPU thread. */
1009 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1010 sigbus_reraise();
1011 }
1012 } else {
1013 /* Called synchronously (via signalfd) in main thread. */
1014 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1015 sigbus_reraise();
1016 }
6d9cb73c
JK
1017 }
1018}
1019
1020static void qemu_init_sigbus(void)
1021{
1022 struct sigaction action;
1023
1024 memset(&action, 0, sizeof(action));
1025 action.sa_flags = SA_SIGINFO;
d98d4072 1026 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1027 sigaction(SIGBUS, &action, NULL);
1028
1029 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1030}
6d9cb73c 1031#else /* !CONFIG_LINUX */
6d9cb73c
JK
1032static void qemu_init_sigbus(void)
1033{
1034}
a16fc07e 1035#endif /* !CONFIG_LINUX */
ff48eb5f 1036
b2532d88 1037static QemuMutex qemu_global_mutex;
296af7c9
BS
1038
1039static QemuThread io_thread;
1040
296af7c9
BS
1041/* cpu creation */
1042static QemuCond qemu_cpu_cond;
1043/* system init */
296af7c9
BS
1044static QemuCond qemu_pause_cond;
1045
d3b12f5d 1046void qemu_init_cpu_loop(void)
296af7c9 1047{
6d9cb73c 1048 qemu_init_sigbus();
ed94592b 1049 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1050 qemu_cond_init(&qemu_pause_cond);
296af7c9 1051 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1052
b7680cb6 1053 qemu_thread_get_self(&io_thread);
296af7c9
BS
1054}
1055
14e6fe12 1056void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1057{
d148d90e 1058 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1059}
1060
4c055ab5
GZ
1061static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1062{
1063 if (kvm_destroy_vcpu(cpu) < 0) {
1064 error_report("kvm_destroy_vcpu failed");
1065 exit(EXIT_FAILURE);
1066 }
1067}
1068
1069static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1070{
1071}
1072
ebd05fea
DH
1073static void qemu_cpu_stop(CPUState *cpu, bool exit)
1074{
1075 g_assert(qemu_cpu_is_self(cpu));
1076 cpu->stop = false;
1077 cpu->stopped = true;
1078 if (exit) {
1079 cpu_exit(cpu);
1080 }
1081 qemu_cond_broadcast(&qemu_pause_cond);
1082}
1083
509a0d78 1084static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1085{
37257942 1086 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1087 if (cpu->stop) {
ebd05fea 1088 qemu_cpu_stop(cpu, false);
296af7c9 1089 }
a5403c69 1090 process_queued_cpu_work(cpu);
37257942
AB
1091}
1092
1093static bool qemu_tcg_should_sleep(CPUState *cpu)
1094{
1095 if (mttcg_enabled) {
1096 return cpu_thread_is_idle(cpu);
1097 } else {
1098 return all_cpu_threads_idle();
1099 }
296af7c9
BS
1100}
1101
d5f8d613 1102static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1103{
37257942 1104 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1105 stop_tcg_kick_timer();
d5f8d613 1106 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1107 }
296af7c9 1108
6546706d
AB
1109 start_tcg_kick_timer();
1110
37257942 1111 qemu_wait_io_event_common(cpu);
296af7c9
BS
1112}
1113
fd529e8f 1114static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1115{
a98ae1d8 1116 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1117 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1118 }
296af7c9 1119
509a0d78 1120 qemu_wait_io_event_common(cpu);
296af7c9
BS
1121}
1122
c97d6d2c
SAGDR
1123static void qemu_hvf_wait_io_event(CPUState *cpu)
1124{
1125 while (cpu_thread_is_idle(cpu)) {
1126 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1127 }
1128 qemu_wait_io_event_common(cpu);
1129}
1130
7e97cd88 1131static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1132{
48a106bd 1133 CPUState *cpu = arg;
84b4915d 1134 int r;
296af7c9 1135
ab28bd23
PB
1136 rcu_register_thread();
1137
2e7f7a3c 1138 qemu_mutex_lock_iothread();
814e612e 1139 qemu_thread_get_self(cpu->thread);
9f09e18a 1140 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1141 cpu->can_do_io = 1;
4917cf44 1142 current_cpu = cpu;
296af7c9 1143
504134d2 1144 r = kvm_init_vcpu(cpu);
84b4915d
JK
1145 if (r < 0) {
1146 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1147 exit(1);
1148 }
296af7c9 1149
18268b60 1150 kvm_init_cpu_signals(cpu);
296af7c9
BS
1151
1152 /* signal CPU creation */
61a46217 1153 cpu->created = true;
296af7c9
BS
1154 qemu_cond_signal(&qemu_cpu_cond);
1155
4c055ab5 1156 do {
a1fcaa73 1157 if (cpu_can_run(cpu)) {
1458c363 1158 r = kvm_cpu_exec(cpu);
83f338f7 1159 if (r == EXCP_DEBUG) {
91325046 1160 cpu_handle_guest_debug(cpu);
83f338f7 1161 }
0ab07c62 1162 }
fd529e8f 1163 qemu_kvm_wait_io_event(cpu);
4c055ab5 1164 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1165
4c055ab5 1166 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1167 cpu->created = false;
1168 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1169 qemu_mutex_unlock_iothread();
296af7c9
BS
1170 return NULL;
1171}
1172
c7f0f3b1
AL
1173static void *qemu_dummy_cpu_thread_fn(void *arg)
1174{
1175#ifdef _WIN32
1176 fprintf(stderr, "qtest is not supported under Windows\n");
1177 exit(1);
1178#else
10a9021d 1179 CPUState *cpu = arg;
c7f0f3b1
AL
1180 sigset_t waitset;
1181 int r;
1182
ab28bd23
PB
1183 rcu_register_thread();
1184
c7f0f3b1 1185 qemu_mutex_lock_iothread();
814e612e 1186 qemu_thread_get_self(cpu->thread);
9f09e18a 1187 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1188 cpu->can_do_io = 1;
37257942 1189 current_cpu = cpu;
c7f0f3b1
AL
1190
1191 sigemptyset(&waitset);
1192 sigaddset(&waitset, SIG_IPI);
1193
1194 /* signal CPU creation */
61a46217 1195 cpu->created = true;
c7f0f3b1
AL
1196 qemu_cond_signal(&qemu_cpu_cond);
1197
c7f0f3b1 1198 while (1) {
c7f0f3b1
AL
1199 qemu_mutex_unlock_iothread();
1200 do {
1201 int sig;
1202 r = sigwait(&waitset, &sig);
1203 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1204 if (r == -1) {
1205 perror("sigwait");
1206 exit(1);
1207 }
1208 qemu_mutex_lock_iothread();
509a0d78 1209 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1210 }
1211
1212 return NULL;
1213#endif
1214}
1215
1be7fcb8
AB
1216static int64_t tcg_get_icount_limit(void)
1217{
1218 int64_t deadline;
1219
1220 if (replay_mode != REPLAY_MODE_PLAY) {
1221 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1222
1223 /* Maintain prior (possibly buggy) behaviour where if no deadline
1224 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1225 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1226 * nanoseconds.
1227 */
1228 if ((deadline < 0) || (deadline > INT32_MAX)) {
1229 deadline = INT32_MAX;
1230 }
1231
1232 return qemu_icount_round(deadline);
1233 } else {
1234 return replay_get_instructions();
1235 }
1236}
1237
12e9700d
AB
1238static void handle_icount_deadline(void)
1239{
6b8f0187 1240 assert(qemu_in_vcpu_thread());
12e9700d
AB
1241 if (use_icount) {
1242 int64_t deadline =
1243 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1244
1245 if (deadline == 0) {
6b8f0187 1246 /* Wake up other AioContexts. */
12e9700d 1247 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1248 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1249 }
1250 }
1251}
1252
05248382 1253static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1254{
1be7fcb8 1255 if (use_icount) {
eda5f7c6 1256 int insns_left;
05248382
AB
1257
1258 /* These should always be cleared by process_icount_data after
1259 * each vCPU execution. However u16.high can be raised
1260 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1261 */
1262 g_assert(cpu->icount_decr.u16.low == 0);
1263 g_assert(cpu->icount_extra == 0);
1264
eda5f7c6
AB
1265 cpu->icount_budget = tcg_get_icount_limit();
1266 insns_left = MIN(0xffff, cpu->icount_budget);
1267 cpu->icount_decr.u16.low = insns_left;
1268 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1269 }
05248382
AB
1270}
1271
1272static void process_icount_data(CPUState *cpu)
1273{
1be7fcb8 1274 if (use_icount) {
e4cd9657 1275 /* Account for executed instructions */
512d3c80 1276 cpu_update_icount(cpu);
05248382
AB
1277
1278 /* Reset the counters */
1279 cpu->icount_decr.u16.low = 0;
1be7fcb8 1280 cpu->icount_extra = 0;
e4cd9657
AB
1281 cpu->icount_budget = 0;
1282
1be7fcb8
AB
1283 replay_account_executed_instructions();
1284 }
05248382
AB
1285}
1286
1287
1288static int tcg_cpu_exec(CPUState *cpu)
1289{
1290 int ret;
1291#ifdef CONFIG_PROFILER
1292 int64_t ti;
1293#endif
1294
1295#ifdef CONFIG_PROFILER
1296 ti = profile_getclock();
1297#endif
1298 qemu_mutex_unlock_iothread();
1299 cpu_exec_start(cpu);
1300 ret = cpu_exec(cpu);
1301 cpu_exec_end(cpu);
1302 qemu_mutex_lock_iothread();
1303#ifdef CONFIG_PROFILER
1304 tcg_time += profile_getclock() - ti;
1305#endif
1be7fcb8
AB
1306 return ret;
1307}
1308
c93bbbef
AB
1309/* Destroy any remaining vCPUs which have been unplugged and have
1310 * finished running
1311 */
1312static void deal_with_unplugged_cpus(void)
1be7fcb8 1313{
c93bbbef 1314 CPUState *cpu;
1be7fcb8 1315
c93bbbef
AB
1316 CPU_FOREACH(cpu) {
1317 if (cpu->unplug && !cpu_can_run(cpu)) {
1318 qemu_tcg_destroy_vcpu(cpu);
1319 cpu->created = false;
1320 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1321 break;
1322 }
1323 }
1be7fcb8 1324}
bdb7ca67 1325
6546706d
AB
1326/* Single-threaded TCG
1327 *
1328 * In the single-threaded case each vCPU is simulated in turn. If
1329 * there is more than a single vCPU we create a simple timer to kick
1330 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1331 * This is done explicitly rather than relying on side-effects
1332 * elsewhere.
1333 */
1334
37257942 1335static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1336{
c3586ba7 1337 CPUState *cpu = arg;
296af7c9 1338
ab28bd23 1339 rcu_register_thread();
3468b59e 1340 tcg_register_thread();
ab28bd23 1341
2e7f7a3c 1342 qemu_mutex_lock_iothread();
814e612e 1343 qemu_thread_get_self(cpu->thread);
296af7c9 1344
38fcbd3f
AF
1345 CPU_FOREACH(cpu) {
1346 cpu->thread_id = qemu_get_thread_id();
1347 cpu->created = true;
626cf8f4 1348 cpu->can_do_io = 1;
38fcbd3f 1349 }
296af7c9
BS
1350 qemu_cond_signal(&qemu_cpu_cond);
1351
fa7d1867 1352 /* wait for initial kick-off after machine start */
c28e399c 1353 while (first_cpu->stopped) {
d5f8d613 1354 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1355
1356 /* process any pending work */
bdc44640 1357 CPU_FOREACH(cpu) {
37257942 1358 current_cpu = cpu;
182735ef 1359 qemu_wait_io_event_common(cpu);
8e564b4e 1360 }
0ab07c62 1361 }
296af7c9 1362
6546706d
AB
1363 start_tcg_kick_timer();
1364
c93bbbef
AB
1365 cpu = first_cpu;
1366
e5143e30
AB
1367 /* process any pending work */
1368 cpu->exit_request = 1;
1369
296af7c9 1370 while (1) {
c93bbbef
AB
1371 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1372 qemu_account_warp_timer();
1373
6b8f0187
PB
1374 /* Run the timers here. This is much more efficient than
1375 * waking up the I/O thread and waiting for completion.
1376 */
1377 handle_icount_deadline();
1378
c93bbbef
AB
1379 if (!cpu) {
1380 cpu = first_cpu;
1381 }
1382
e5143e30
AB
1383 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1384
791158d9 1385 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1386 current_cpu = cpu;
c93bbbef
AB
1387
1388 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1389 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1390
1391 if (cpu_can_run(cpu)) {
1392 int r;
05248382
AB
1393
1394 prepare_icount_for_run(cpu);
1395
c93bbbef 1396 r = tcg_cpu_exec(cpu);
05248382
AB
1397
1398 process_icount_data(cpu);
1399
c93bbbef
AB
1400 if (r == EXCP_DEBUG) {
1401 cpu_handle_guest_debug(cpu);
1402 break;
08e73c48
PK
1403 } else if (r == EXCP_ATOMIC) {
1404 qemu_mutex_unlock_iothread();
1405 cpu_exec_step_atomic(cpu);
1406 qemu_mutex_lock_iothread();
1407 break;
c93bbbef 1408 }
37257942 1409 } else if (cpu->stop) {
c93bbbef
AB
1410 if (cpu->unplug) {
1411 cpu = CPU_NEXT(cpu);
1412 }
1413 break;
1414 }
1415
e5143e30
AB
1416 cpu = CPU_NEXT(cpu);
1417 } /* while (cpu && !cpu->exit_request).. */
1418
791158d9
AB
1419 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1420 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1421
e5143e30
AB
1422 if (cpu && cpu->exit_request) {
1423 atomic_mb_set(&cpu->exit_request, 0);
1424 }
ac70aafc 1425
37257942 1426 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1427 deal_with_unplugged_cpus();
296af7c9
BS
1428 }
1429
1430 return NULL;
1431}
1432
b0cb0a66
VP
1433static void *qemu_hax_cpu_thread_fn(void *arg)
1434{
1435 CPUState *cpu = arg;
1436 int r;
b3d3a426
VP
1437
1438 qemu_mutex_lock_iothread();
b0cb0a66 1439 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1440
1441 cpu->thread_id = qemu_get_thread_id();
1442 cpu->created = true;
1443 cpu->halted = 0;
1444 current_cpu = cpu;
1445
1446 hax_init_vcpu(cpu);
1447 qemu_cond_signal(&qemu_cpu_cond);
1448
1449 while (1) {
1450 if (cpu_can_run(cpu)) {
1451 r = hax_smp_cpu_exec(cpu);
1452 if (r == EXCP_DEBUG) {
1453 cpu_handle_guest_debug(cpu);
1454 }
1455 }
1456
1457 while (cpu_thread_is_idle(cpu)) {
1458 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1459 }
1460#ifdef _WIN32
1461 SleepEx(0, TRUE);
1462#endif
1463 qemu_wait_io_event_common(cpu);
1464 }
1465 return NULL;
1466}
1467
c97d6d2c
SAGDR
1468/* The HVF-specific vCPU thread function. This one should only run when the host
1469 * CPU supports the VMX "unrestricted guest" feature. */
1470static void *qemu_hvf_cpu_thread_fn(void *arg)
1471{
1472 CPUState *cpu = arg;
1473
1474 int r;
1475
1476 assert(hvf_enabled());
1477
1478 rcu_register_thread();
1479
1480 qemu_mutex_lock_iothread();
1481 qemu_thread_get_self(cpu->thread);
1482
1483 cpu->thread_id = qemu_get_thread_id();
1484 cpu->can_do_io = 1;
1485 current_cpu = cpu;
1486
1487 hvf_init_vcpu(cpu);
1488
1489 /* signal CPU creation */
1490 cpu->created = true;
1491 qemu_cond_signal(&qemu_cpu_cond);
1492
1493 do {
1494 if (cpu_can_run(cpu)) {
1495 r = hvf_vcpu_exec(cpu);
1496 if (r == EXCP_DEBUG) {
1497 cpu_handle_guest_debug(cpu);
1498 }
1499 }
1500 qemu_hvf_wait_io_event(cpu);
1501 } while (!cpu->unplug || cpu_can_run(cpu));
1502
1503 hvf_vcpu_destroy(cpu);
1504 cpu->created = false;
1505 qemu_cond_signal(&qemu_cpu_cond);
1506 qemu_mutex_unlock_iothread();
1507 return NULL;
1508}
1509
b0cb0a66
VP
1510#ifdef _WIN32
1511static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1512{
1513}
1514#endif
1515
37257942
AB
1516/* Multi-threaded TCG
1517 *
1518 * In the multi-threaded case each vCPU has its own thread. The TLS
1519 * variable current_cpu can be used deep in the code to find the
1520 * current CPUState for a given thread.
1521 */
1522
1523static void *qemu_tcg_cpu_thread_fn(void *arg)
1524{
1525 CPUState *cpu = arg;
1526
bf51c720
AB
1527 g_assert(!use_icount);
1528
37257942 1529 rcu_register_thread();
3468b59e 1530 tcg_register_thread();
37257942
AB
1531
1532 qemu_mutex_lock_iothread();
1533 qemu_thread_get_self(cpu->thread);
1534
1535 cpu->thread_id = qemu_get_thread_id();
1536 cpu->created = true;
1537 cpu->can_do_io = 1;
1538 current_cpu = cpu;
1539 qemu_cond_signal(&qemu_cpu_cond);
1540
1541 /* process any pending work */
1542 cpu->exit_request = 1;
1543
1544 while (1) {
1545 if (cpu_can_run(cpu)) {
1546 int r;
1547 r = tcg_cpu_exec(cpu);
1548 switch (r) {
1549 case EXCP_DEBUG:
1550 cpu_handle_guest_debug(cpu);
1551 break;
1552 case EXCP_HALTED:
1553 /* during start-up the vCPU is reset and the thread is
1554 * kicked several times. If we don't ensure we go back
1555 * to sleep in the halted state we won't cleanly
1556 * start-up when the vCPU is enabled.
1557 *
1558 * cpu->halted should ensure we sleep in wait_io_event
1559 */
1560 g_assert(cpu->halted);
1561 break;
08e73c48
PK
1562 case EXCP_ATOMIC:
1563 qemu_mutex_unlock_iothread();
1564 cpu_exec_step_atomic(cpu);
1565 qemu_mutex_lock_iothread();
37257942
AB
1566 default:
1567 /* Ignore everything else? */
1568 break;
1569 }
a3e53273
BR
1570 } else if (cpu->unplug) {
1571 qemu_tcg_destroy_vcpu(cpu);
1572 cpu->created = false;
1573 qemu_cond_signal(&qemu_cpu_cond);
1574 qemu_mutex_unlock_iothread();
1575 return NULL;
37257942
AB
1576 }
1577
37257942
AB
1578 atomic_mb_set(&cpu->exit_request, 0);
1579 qemu_tcg_wait_io_event(cpu);
1580 }
1581
1582 return NULL;
1583}
1584
2ff09a40 1585static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1586{
1587#ifndef _WIN32
1588 int err;
1589
e0c38211
PB
1590 if (cpu->thread_kicked) {
1591 return;
9102deda 1592 }
e0c38211 1593 cpu->thread_kicked = true;
814e612e 1594 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1595 if (err) {
1596 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1597 exit(1);
1598 }
1599#else /* _WIN32 */
b0cb0a66
VP
1600 if (!qemu_cpu_is_self(cpu)) {
1601 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1602 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1603 __func__, GetLastError());
1604 exit(1);
1605 }
1606 }
e0c38211
PB
1607#endif
1608}
ed9164a3 1609
c08d7424 1610void qemu_cpu_kick(CPUState *cpu)
296af7c9 1611{
f5c121b8 1612 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1613 if (tcg_enabled()) {
791158d9 1614 cpu_exit(cpu);
37257942 1615 /* NOP unless doing single-thread RR */
791158d9 1616 qemu_cpu_kick_rr_cpu();
e0c38211 1617 } else {
b0cb0a66
VP
1618 if (hax_enabled()) {
1619 /*
1620 * FIXME: race condition with the exit_request check in
1621 * hax_vcpu_hax_exec
1622 */
1623 cpu->exit_request = 1;
1624 }
e0c38211
PB
1625 qemu_cpu_kick_thread(cpu);
1626 }
296af7c9
BS
1627}
1628
46d62fac 1629void qemu_cpu_kick_self(void)
296af7c9 1630{
4917cf44 1631 assert(current_cpu);
9102deda 1632 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1633}
1634
60e82579 1635bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1636{
814e612e 1637 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1638}
1639
79e2b9ae 1640bool qemu_in_vcpu_thread(void)
aa723c23 1641{
4917cf44 1642 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1643}
1644
afbe7053
PB
1645static __thread bool iothread_locked = false;
1646
1647bool qemu_mutex_iothread_locked(void)
1648{
1649 return iothread_locked;
1650}
1651
296af7c9
BS
1652void qemu_mutex_lock_iothread(void)
1653{
8d04fb55
JK
1654 g_assert(!qemu_mutex_iothread_locked());
1655 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1656 iothread_locked = true;
296af7c9
BS
1657}
1658
1659void qemu_mutex_unlock_iothread(void)
1660{
8d04fb55 1661 g_assert(qemu_mutex_iothread_locked());
afbe7053 1662 iothread_locked = false;
296af7c9
BS
1663 qemu_mutex_unlock(&qemu_global_mutex);
1664}
1665
e8faee06 1666static bool all_vcpus_paused(void)
296af7c9 1667{
bdc44640 1668 CPUState *cpu;
296af7c9 1669
bdc44640 1670 CPU_FOREACH(cpu) {
182735ef 1671 if (!cpu->stopped) {
e8faee06 1672 return false;
0ab07c62 1673 }
296af7c9
BS
1674 }
1675
e8faee06 1676 return true;
296af7c9
BS
1677}
1678
1679void pause_all_vcpus(void)
1680{
bdc44640 1681 CPUState *cpu;
296af7c9 1682
40daca54 1683 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1684 CPU_FOREACH(cpu) {
ebd05fea
DH
1685 if (qemu_cpu_is_self(cpu)) {
1686 qemu_cpu_stop(cpu, true);
1687 } else {
1688 cpu->stop = true;
1689 qemu_cpu_kick(cpu);
1690 }
d798e974
JK
1691 }
1692
296af7c9 1693 while (!all_vcpus_paused()) {
be7d6c57 1694 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1695 CPU_FOREACH(cpu) {
182735ef 1696 qemu_cpu_kick(cpu);
296af7c9
BS
1697 }
1698 }
1699}
1700
2993683b
IM
1701void cpu_resume(CPUState *cpu)
1702{
1703 cpu->stop = false;
1704 cpu->stopped = false;
1705 qemu_cpu_kick(cpu);
1706}
1707
296af7c9
BS
1708void resume_all_vcpus(void)
1709{
bdc44640 1710 CPUState *cpu;
296af7c9 1711
40daca54 1712 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1713 CPU_FOREACH(cpu) {
182735ef 1714 cpu_resume(cpu);
296af7c9
BS
1715 }
1716}
1717
4c055ab5
GZ
1718void cpu_remove(CPUState *cpu)
1719{
1720 cpu->stop = true;
1721 cpu->unplug = true;
1722 qemu_cpu_kick(cpu);
1723}
1724
2c579042
BR
1725void cpu_remove_sync(CPUState *cpu)
1726{
1727 cpu_remove(cpu);
1728 while (cpu->created) {
1729 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1730 }
1731}
1732
4900116e
DDAG
1733/* For temporary buffers for forming a name */
1734#define VCPU_THREAD_NAME_SIZE 16
1735
e5ab30a2 1736static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1737{
4900116e 1738 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1739 static QemuCond *single_tcg_halt_cond;
1740 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1741 static int tcg_region_inited;
1742
1743 /*
1744 * Initialize TCG regions--once. Now is a good time, because:
1745 * (1) TCG's init context, prologue and target globals have been set up.
1746 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1747 * -accel flag is processed, so the check doesn't work then).
1748 */
1749 if (!tcg_region_inited) {
1750 tcg_region_inited = 1;
1751 tcg_region_init();
1752 }
4900116e 1753
37257942 1754 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1755 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1756 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1757 qemu_cond_init(cpu->halt_cond);
37257942
AB
1758
1759 if (qemu_tcg_mttcg_enabled()) {
1760 /* create a thread per vCPU with TCG (MTTCG) */
1761 parallel_cpus = true;
1762 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1763 cpu->cpu_index);
37257942
AB
1764
1765 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1766 cpu, QEMU_THREAD_JOINABLE);
1767
1768 } else {
1769 /* share a single thread for all cpus with TCG */
1770 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1771 qemu_thread_create(cpu->thread, thread_name,
1772 qemu_tcg_rr_cpu_thread_fn,
1773 cpu, QEMU_THREAD_JOINABLE);
1774
1775 single_tcg_halt_cond = cpu->halt_cond;
1776 single_tcg_cpu_thread = cpu->thread;
1777 }
1ecf47bf 1778#ifdef _WIN32
814e612e 1779 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1780#endif
61a46217 1781 while (!cpu->created) {
18a85728 1782 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1783 }
296af7c9 1784 } else {
37257942
AB
1785 /* For non-MTTCG cases we share the thread */
1786 cpu->thread = single_tcg_cpu_thread;
1787 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1788 }
1789}
1790
b0cb0a66
VP
1791static void qemu_hax_start_vcpu(CPUState *cpu)
1792{
1793 char thread_name[VCPU_THREAD_NAME_SIZE];
1794
1795 cpu->thread = g_malloc0(sizeof(QemuThread));
1796 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1797 qemu_cond_init(cpu->halt_cond);
1798
1799 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1800 cpu->cpu_index);
1801 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1802 cpu, QEMU_THREAD_JOINABLE);
1803#ifdef _WIN32
1804 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1805#endif
1806 while (!cpu->created) {
1807 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1808 }
1809}
1810
48a106bd 1811static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1812{
4900116e
DDAG
1813 char thread_name[VCPU_THREAD_NAME_SIZE];
1814
814e612e 1815 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1816 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1817 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1818 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1819 cpu->cpu_index);
1820 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1821 cpu, QEMU_THREAD_JOINABLE);
61a46217 1822 while (!cpu->created) {
18a85728 1823 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1824 }
296af7c9
BS
1825}
1826
c97d6d2c
SAGDR
1827static void qemu_hvf_start_vcpu(CPUState *cpu)
1828{
1829 char thread_name[VCPU_THREAD_NAME_SIZE];
1830
1831 /* HVF currently does not support TCG, and only runs in
1832 * unrestricted-guest mode. */
1833 assert(hvf_enabled());
1834
1835 cpu->thread = g_malloc0(sizeof(QemuThread));
1836 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1837 qemu_cond_init(cpu->halt_cond);
1838
1839 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1840 cpu->cpu_index);
1841 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1842 cpu, QEMU_THREAD_JOINABLE);
1843 while (!cpu->created) {
1844 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1845 }
1846}
1847
10a9021d 1848static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1849{
4900116e
DDAG
1850 char thread_name[VCPU_THREAD_NAME_SIZE];
1851
814e612e 1852 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1853 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1854 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1855 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1856 cpu->cpu_index);
1857 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1858 QEMU_THREAD_JOINABLE);
61a46217 1859 while (!cpu->created) {
c7f0f3b1
AL
1860 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1861 }
1862}
1863
c643bed9 1864void qemu_init_vcpu(CPUState *cpu)
296af7c9 1865{
ce3960eb
AF
1866 cpu->nr_cores = smp_cores;
1867 cpu->nr_threads = smp_threads;
f324e766 1868 cpu->stopped = true;
56943e8c
PM
1869
1870 if (!cpu->as) {
1871 /* If the target cpu hasn't set up any address spaces itself,
1872 * give it the default one.
1873 */
12ebc9a7 1874 cpu->num_ases = 1;
80ceb07a 1875 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
1876 }
1877
0ab07c62 1878 if (kvm_enabled()) {
48a106bd 1879 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1880 } else if (hax_enabled()) {
1881 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
1882 } else if (hvf_enabled()) {
1883 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 1884 } else if (tcg_enabled()) {
e5ab30a2 1885 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1886 } else {
10a9021d 1887 qemu_dummy_start_vcpu(cpu);
0ab07c62 1888 }
296af7c9
BS
1889}
1890
b4a3d965 1891void cpu_stop_current(void)
296af7c9 1892{
4917cf44 1893 if (current_cpu) {
ebd05fea 1894 qemu_cpu_stop(current_cpu, true);
b4a3d965 1895 }
296af7c9
BS
1896}
1897
56983463 1898int vm_stop(RunState state)
296af7c9 1899{
aa723c23 1900 if (qemu_in_vcpu_thread()) {
74892d24 1901 qemu_system_vmstop_request_prepare();
1dfb4dd9 1902 qemu_system_vmstop_request(state);
296af7c9
BS
1903 /*
1904 * FIXME: should not return to device code in case
1905 * vm_stop() has been requested.
1906 */
b4a3d965 1907 cpu_stop_current();
56983463 1908 return 0;
296af7c9 1909 }
56983463
KW
1910
1911 return do_vm_stop(state);
296af7c9
BS
1912}
1913
2d76e823
CI
1914/**
1915 * Prepare for (re)starting the VM.
1916 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1917 * running or in case of an error condition), 0 otherwise.
1918 */
1919int vm_prepare_start(void)
1920{
1921 RunState requested;
1922 int res = 0;
1923
1924 qemu_vmstop_requested(&requested);
1925 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1926 return -1;
1927 }
1928
1929 /* Ensure that a STOP/RESUME pair of events is emitted if a
1930 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1931 * example, according to documentation is always followed by
1932 * the STOP event.
1933 */
1934 if (runstate_is_running()) {
1935 qapi_event_send_stop(&error_abort);
1936 res = -1;
1937 } else {
1938 replay_enable_events();
1939 cpu_enable_ticks();
1940 runstate_set(RUN_STATE_RUNNING);
1941 vm_state_notify(1, RUN_STATE_RUNNING);
1942 }
1943
1944 /* We are sending this now, but the CPUs will be resumed shortly later */
1945 qapi_event_send_resume(&error_abort);
1946 return res;
1947}
1948
1949void vm_start(void)
1950{
1951 if (!vm_prepare_start()) {
1952 resume_all_vcpus();
1953 }
1954}
1955
8a9236f1
LC
1956/* does a state transition even if the VM is already stopped,
1957 current state is forgotten forever */
56983463 1958int vm_stop_force_state(RunState state)
8a9236f1
LC
1959{
1960 if (runstate_is_running()) {
56983463 1961 return vm_stop(state);
8a9236f1
LC
1962 } else {
1963 runstate_set(state);
b2780d32
WC
1964
1965 bdrv_drain_all();
594a45ce
KW
1966 /* Make sure to return an error if the flush in a previous vm_stop()
1967 * failed. */
22af08ea 1968 return bdrv_flush_all();
8a9236f1
LC
1969 }
1970}
1971
9a78eead 1972void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1973{
1974 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1975#if defined(cpu_list)
1976 cpu_list(f, cpu_fprintf);
262353cb
BS
1977#endif
1978}
de0b36b6
LC
1979
1980CpuInfoList *qmp_query_cpus(Error **errp)
1981{
afed5a5a
IM
1982 MachineState *ms = MACHINE(qdev_get_machine());
1983 MachineClass *mc = MACHINE_GET_CLASS(ms);
de0b36b6 1984 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1985 CPUState *cpu;
de0b36b6 1986
bdc44640 1987 CPU_FOREACH(cpu) {
de0b36b6 1988 CpuInfoList *info;
182735ef
AF
1989#if defined(TARGET_I386)
1990 X86CPU *x86_cpu = X86_CPU(cpu);
1991 CPUX86State *env = &x86_cpu->env;
1992#elif defined(TARGET_PPC)
1993 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1994 CPUPPCState *env = &ppc_cpu->env;
1995#elif defined(TARGET_SPARC)
1996 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1997 CPUSPARCState *env = &sparc_cpu->env;
1998#elif defined(TARGET_MIPS)
1999 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2000 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
2001#elif defined(TARGET_TRICORE)
2002 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2003 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 2004#endif
de0b36b6 2005
cb446eca 2006 cpu_synchronize_state(cpu);
de0b36b6
LC
2007
2008 info = g_malloc0(sizeof(*info));
2009 info->value = g_malloc0(sizeof(*info->value));
55e5c285 2010 info->value->CPU = cpu->cpu_index;
182735ef 2011 info->value->current = (cpu == first_cpu);
259186a7 2012 info->value->halted = cpu->halted;
58f88d4b 2013 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 2014 info->value->thread_id = cpu->thread_id;
de0b36b6 2015#if defined(TARGET_I386)
86f4b687 2016 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 2017 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 2018#elif defined(TARGET_PPC)
86f4b687 2019 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 2020 info->value->u.ppc.nip = env->nip;
de0b36b6 2021#elif defined(TARGET_SPARC)
86f4b687 2022 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
2023 info->value->u.q_sparc.pc = env->pc;
2024 info->value->u.q_sparc.npc = env->npc;
de0b36b6 2025#elif defined(TARGET_MIPS)
86f4b687 2026 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 2027 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 2028#elif defined(TARGET_TRICORE)
86f4b687 2029 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 2030 info->value->u.tricore.PC = env->PC;
86f4b687
EB
2031#else
2032 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6 2033#endif
afed5a5a
IM
2034 info->value->has_props = !!mc->cpu_index_to_instance_props;
2035 if (info->value->has_props) {
2036 CpuInstanceProperties *props;
2037 props = g_malloc0(sizeof(*props));
2038 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2039 info->value->props = props;
2040 }
de0b36b6
LC
2041
2042 /* XXX: waiting for the qapi to support GSList */
2043 if (!cur_item) {
2044 head = cur_item = info;
2045 } else {
2046 cur_item->next = info;
2047 cur_item = info;
2048 }
2049 }
2050
2051 return head;
2052}
0cfd6a9a
LC
2053
2054void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2055 bool has_cpu, int64_t cpu_index, Error **errp)
2056{
2057 FILE *f;
2058 uint32_t l;
55e5c285 2059 CPUState *cpu;
0cfd6a9a 2060 uint8_t buf[1024];
0dc9daf0 2061 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2062
2063 if (!has_cpu) {
2064 cpu_index = 0;
2065 }
2066
151d1322
AF
2067 cpu = qemu_get_cpu(cpu_index);
2068 if (cpu == NULL) {
c6bd8c70
MA
2069 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2070 "a CPU number");
0cfd6a9a
LC
2071 return;
2072 }
2073
2074 f = fopen(filename, "wb");
2075 if (!f) {
618da851 2076 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2077 return;
2078 }
2079
2080 while (size != 0) {
2081 l = sizeof(buf);
2082 if (l > size)
2083 l = size;
2f4d0f59 2084 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2085 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2086 " specified", orig_addr, orig_size);
2f4d0f59
AK
2087 goto exit;
2088 }
0cfd6a9a 2089 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2090 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2091 goto exit;
2092 }
2093 addr += l;
2094 size -= l;
2095 }
2096
2097exit:
2098 fclose(f);
2099}
6d3962bf
LC
2100
2101void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2102 Error **errp)
2103{
2104 FILE *f;
2105 uint32_t l;
2106 uint8_t buf[1024];
2107
2108 f = fopen(filename, "wb");
2109 if (!f) {
618da851 2110 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2111 return;
2112 }
2113
2114 while (size != 0) {
2115 l = sizeof(buf);
2116 if (l > size)
2117 l = size;
eb6282f2 2118 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2119 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2120 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2121 goto exit;
2122 }
2123 addr += l;
2124 size -= l;
2125 }
2126
2127exit:
2128 fclose(f);
2129}
ab49ab5c
LC
2130
2131void qmp_inject_nmi(Error **errp)
2132{
9cb805fd 2133 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2134}
27498bef
ST
2135
2136void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2137{
2138 if (!use_icount) {
2139 return;
2140 }
2141
2142 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2143 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2144 if (icount_align_option) {
2145 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2146 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2147 } else {
2148 cpu_fprintf(f, "Max guest delay NA\n");
2149 cpu_fprintf(f, "Max guest advance NA\n");
2150 }
2151}