]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
usb: Deprecate the legacy -usbdevice option
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
afed5a5a 53#include "hw/boards.h"
0ff0fc19 54
6d9cb73c
JK
55#ifdef CONFIG_LINUX
56
57#include <sys/prctl.h>
58
c0532a76
MT
59#ifndef PR_MCE_KILL
60#define PR_MCE_KILL 33
61#endif
62
6d9cb73c
JK
63#ifndef PR_MCE_KILL_SET
64#define PR_MCE_KILL_SET 1
65#endif
66
67#ifndef PR_MCE_KILL_EARLY
68#define PR_MCE_KILL_EARLY 1
69#endif
70
71#endif /* CONFIG_LINUX */
72
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
8d4e9146
FK
152bool mttcg_enabled;
153
154/*
155 * We default to false if we know other options have been enabled
156 * which are currently incompatible with MTTCG. Otherwise when each
157 * guest (target) has been updated to support:
158 * - atomic instructions
159 * - memory ordering primitives (barriers)
160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 *
162 * Once a guest architecture has been converted to the new primitives
163 * there are two remaining limitations to check.
164 *
165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
166 * - The host must have a stronger memory order than the guest
167 *
168 * It may be possible in future to support strong guests on weak hosts
169 * but that will require tagging all load/stores in a guest with their
170 * implicit memory order requirements which would likely slow things
171 * down a lot.
172 */
173
174static bool check_tcg_memory_orders_compatible(void)
175{
176#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
177 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
178#else
179 return false;
180#endif
181}
182
183static bool default_mttcg_enabled(void)
184{
83fd9629 185 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
186 return false;
187 } else {
188#ifdef TARGET_SUPPORTS_MTTCG
189 return check_tcg_memory_orders_compatible();
190#else
191 return false;
192#endif
193 }
194}
195
196void qemu_tcg_configure(QemuOpts *opts, Error **errp)
197{
198 const char *t = qemu_opt_get(opts, "thread");
199 if (t) {
200 if (strcmp(t, "multi") == 0) {
201 if (TCG_OVERSIZED_GUEST) {
202 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
203 } else if (use_icount) {
204 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 205 } else {
86953503 206#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
207 error_report("Guest not yet converted to MTTCG - "
208 "you may get unexpected results");
209#endif
8d4e9146
FK
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
8cfef892 213 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
214 }
215 mttcg_enabled = true;
216 }
217 } else if (strcmp(t, "single") == 0) {
218 mttcg_enabled = false;
219 } else {
220 error_setg(errp, "Invalid 'thread' setting %s", t);
221 }
222 } else {
223 mttcg_enabled = default_mttcg_enabled();
224 }
225}
946fb27c 226
e4cd9657
AB
227/* The current number of executed instructions is based on what we
228 * originally budgeted minus the current state of the decrementing
229 * icount counters in extra/u16.low.
230 */
231static int64_t cpu_get_icount_executed(CPUState *cpu)
232{
233 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
234}
235
512d3c80
AB
236/*
237 * Update the global shared timer_state.qemu_icount to take into
238 * account executed instructions. This is done by the TCG vCPU
239 * thread so the main-loop can see time has moved forward.
240 */
241void cpu_update_icount(CPUState *cpu)
242{
243 int64_t executed = cpu_get_icount_executed(cpu);
244 cpu->icount_budget -= executed;
245
246#ifdef CONFIG_ATOMIC64
247 atomic_set__nocheck(&timers_state.qemu_icount,
248 atomic_read__nocheck(&timers_state.qemu_icount) +
249 executed);
250#else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state.qemu_icount += executed;
252#endif
253}
254
2a62914b 255int64_t cpu_get_icount_raw(void)
946fb27c 256{
4917cf44 257 CPUState *cpu = current_cpu;
946fb27c 258
243c5f77 259 if (cpu && cpu->running) {
414b15c9 260 if (!cpu->can_do_io) {
2a62914b
PD
261 fprintf(stderr, "Bad icount read\n");
262 exit(1);
946fb27c 263 }
e4cd9657 264 /* Take into account what has run */
1d05906b 265 cpu_update_icount(cpu);
946fb27c 266 }
1d05906b
AB
267#ifdef CONFIG_ATOMIC64
268 return atomic_read__nocheck(&timers_state.qemu_icount);
269#else /* FIXME: we need 64bit atomics to do this safely */
270 return timers_state.qemu_icount;
271#endif
2a62914b
PD
272}
273
274/* Return the virtual CPU time, based on the instruction counter. */
275static int64_t cpu_get_icount_locked(void)
276{
277 int64_t icount = cpu_get_icount_raw();
3f031313 278 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
279}
280
17a15f1b
PB
281int64_t cpu_get_icount(void)
282{
283 int64_t icount;
284 unsigned start;
285
286 do {
287 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
288 icount = cpu_get_icount_locked();
289 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
290
291 return icount;
292}
293
3f031313
FK
294int64_t cpu_icount_to_ns(int64_t icount)
295{
296 return icount << icount_time_shift;
297}
298
d90f3cca
C
299/* return the time elapsed in VM between vm_start and vm_stop. Unless
300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
301 * counter.
302 *
303 * Caller must hold the BQL
304 */
946fb27c
PB
305int64_t cpu_get_ticks(void)
306{
5f3e3101
PB
307 int64_t ticks;
308
946fb27c
PB
309 if (use_icount) {
310 return cpu_get_icount();
311 }
5f3e3101
PB
312
313 ticks = timers_state.cpu_ticks_offset;
314 if (timers_state.cpu_ticks_enabled) {
4a7428c5 315 ticks += cpu_get_host_ticks();
5f3e3101
PB
316 }
317
318 if (timers_state.cpu_ticks_prev > ticks) {
319 /* Note: non increasing ticks may happen if the host uses
320 software suspend */
321 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
322 ticks = timers_state.cpu_ticks_prev;
946fb27c 323 }
5f3e3101
PB
324
325 timers_state.cpu_ticks_prev = ticks;
326 return ticks;
946fb27c
PB
327}
328
cb365646 329static int64_t cpu_get_clock_locked(void)
946fb27c 330{
1d45cea5 331 int64_t time;
cb365646 332
1d45cea5 333 time = timers_state.cpu_clock_offset;
5f3e3101 334 if (timers_state.cpu_ticks_enabled) {
1d45cea5 335 time += get_clock();
946fb27c 336 }
cb365646 337
1d45cea5 338 return time;
cb365646
LPF
339}
340
d90f3cca 341/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
342 * the time between vm_start and vm_stop
343 */
cb365646
LPF
344int64_t cpu_get_clock(void)
345{
346 int64_t ti;
347 unsigned start;
348
349 do {
350 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 ti = cpu_get_clock_locked();
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
353
354 return ti;
946fb27c
PB
355}
356
cb365646 357/* enable cpu_get_ticks()
3224e878 358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 359 */
946fb27c
PB
360void cpu_enable_ticks(void)
361{
cb365646 362 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 364 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 365 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
366 timers_state.cpu_clock_offset -= get_clock();
367 timers_state.cpu_ticks_enabled = 1;
368 }
03719e44 369 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
370}
371
372/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 373 * cpu_get_ticks() after that.
3224e878 374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 375 */
946fb27c
PB
376void cpu_disable_ticks(void)
377{
cb365646 378 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 379 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 380 if (timers_state.cpu_ticks_enabled) {
4a7428c5 381 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 382 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
383 timers_state.cpu_ticks_enabled = 0;
384 }
03719e44 385 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
386}
387
388/* Correlation between real and virtual time is always going to be
389 fairly approximate, so ignore small variation.
390 When the guest is idle real and virtual time will be aligned in
391 the IO wait loop. */
73bcb24d 392#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
393
394static void icount_adjust(void)
395{
396 int64_t cur_time;
397 int64_t cur_icount;
398 int64_t delta;
a3270e19
PB
399
400 /* Protected by TimersState mutex. */
946fb27c 401 static int64_t last_delta;
468cc7cf 402
946fb27c
PB
403 /* If the VM is not running, then do nothing. */
404 if (!runstate_is_running()) {
405 return;
406 }
468cc7cf 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
409 cur_time = cpu_get_clock_locked();
410 cur_icount = cpu_get_icount_locked();
468cc7cf 411
946fb27c
PB
412 delta = cur_icount - cur_time;
413 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
414 if (delta > 0
415 && last_delta + ICOUNT_WOBBLE < delta * 2
416 && icount_time_shift > 0) {
417 /* The guest is getting too far ahead. Slow time down. */
418 icount_time_shift--;
419 }
420 if (delta < 0
421 && last_delta - ICOUNT_WOBBLE > delta * 2
422 && icount_time_shift < MAX_ICOUNT_SHIFT) {
423 /* The guest is getting too far behind. Speed time up. */
424 icount_time_shift++;
425 }
426 last_delta = delta;
c96778bb
FK
427 timers_state.qemu_icount_bias = cur_icount
428 - (timers_state.qemu_icount << icount_time_shift);
03719e44 429 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
430}
431
432static void icount_adjust_rt(void *opaque)
433{
40daca54 434 timer_mod(icount_rt_timer,
1979b908 435 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
436 icount_adjust();
437}
438
439static void icount_adjust_vm(void *opaque)
440{
40daca54
AB
441 timer_mod(icount_vm_timer,
442 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 443 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
444 icount_adjust();
445}
446
447static int64_t qemu_icount_round(int64_t count)
448{
449 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
450}
451
efab87cf 452static void icount_warp_rt(void)
946fb27c 453{
ccffff48
AB
454 unsigned seq;
455 int64_t warp_start;
456
17a15f1b
PB
457 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
458 * changes from -1 to another value, so the race here is okay.
459 */
ccffff48
AB
460 do {
461 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
462 warp_start = vm_clock_warp_start;
463 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
464
465 if (warp_start == -1) {
946fb27c
PB
466 return;
467 }
468
03719e44 469 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 470 if (runstate_is_running()) {
8eda206e
PD
471 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
472 cpu_get_clock_locked());
8ed961d9
PB
473 int64_t warp_delta;
474
475 warp_delta = clock - vm_clock_warp_start;
476 if (use_icount == 2) {
946fb27c 477 /*
40daca54 478 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
479 * far ahead of real time.
480 */
17a15f1b 481 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 482 int64_t delta = clock - cur_icount;
8ed961d9 483 warp_delta = MIN(warp_delta, delta);
946fb27c 484 }
c96778bb 485 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
486 }
487 vm_clock_warp_start = -1;
03719e44 488 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
489
490 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 }
946fb27c
PB
493}
494
e76d1798 495static void icount_timer_cb(void *opaque)
efab87cf 496{
e76d1798
PD
497 /* No need for a checkpoint because the timer already synchronizes
498 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
499 */
500 icount_warp_rt();
efab87cf
PD
501}
502
8156be56
PB
503void qtest_clock_warp(int64_t dest)
504{
40daca54 505 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 506 AioContext *aio_context;
8156be56 507 assert(qtest_enabled());
efef88b3 508 aio_context = qemu_get_aio_context();
8156be56 509 while (clock < dest) {
40daca54 510 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 511 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 512
03719e44 513 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 514 timers_state.qemu_icount_bias += warp;
03719e44 515 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 516
40daca54 517 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 518 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 519 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 520 }
40daca54 521 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
522}
523
e76d1798 524void qemu_start_warp_timer(void)
946fb27c 525{
ce78d18c 526 int64_t clock;
946fb27c
PB
527 int64_t deadline;
528
e76d1798 529 if (!use_icount) {
946fb27c
PB
530 return;
531 }
532
8bd7f71d
PD
533 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
534 * do not fire, so computing the deadline does not make sense.
535 */
536 if (!runstate_is_running()) {
537 return;
538 }
539
540 /* warp clock deterministically in record/replay mode */
e76d1798 541 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
542 return;
543 }
544
ce78d18c 545 if (!all_cpu_threads_idle()) {
946fb27c
PB
546 return;
547 }
548
8156be56
PB
549 if (qtest_enabled()) {
550 /* When testing, qtest commands advance icount. */
e76d1798 551 return;
8156be56
PB
552 }
553
ac70aafc 554 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 555 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 556 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 557 if (deadline < 0) {
d7a0f71d
VC
558 static bool notified;
559 if (!icount_sleep && !notified) {
560 error_report("WARNING: icount sleep disabled and no active timers");
561 notified = true;
562 }
ce78d18c 563 return;
ac70aafc
AB
564 }
565
946fb27c
PB
566 if (deadline > 0) {
567 /*
40daca54 568 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
569 * sleep. Otherwise, the CPU might be waiting for a future timer
570 * interrupt to wake it up, but the interrupt never comes because
571 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 572 * QEMU_CLOCK_VIRTUAL.
946fb27c 573 */
5045e9d9
VC
574 if (!icount_sleep) {
575 /*
576 * We never let VCPUs sleep in no sleep icount mode.
577 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
578 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
579 * It is useful when we want a deterministic execution time,
580 * isolated from host latencies.
581 */
03719e44 582 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 583 timers_state.qemu_icount_bias += deadline;
03719e44 584 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
587 /*
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
594 */
03719e44 595 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
596 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
597 vm_clock_warp_start = clock;
598 }
03719e44 599 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 600 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 601 }
ac70aafc 602 } else if (deadline == 0) {
40daca54 603 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
604 }
605}
606
e76d1798
PD
607static void qemu_account_warp_timer(void)
608{
609 if (!use_icount || !icount_sleep) {
610 return;
611 }
612
613 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
614 * do not fire, so computing the deadline does not make sense.
615 */
616 if (!runstate_is_running()) {
617 return;
618 }
619
620 /* warp clock deterministically in record/replay mode */
621 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
622 return;
623 }
624
625 timer_del(icount_warp_timer);
626 icount_warp_rt();
627}
628
d09eae37
FK
629static bool icount_state_needed(void *opaque)
630{
631 return use_icount;
632}
633
634/*
635 * This is a subsection for icount migration.
636 */
637static const VMStateDescription icount_vmstate_timers = {
638 .name = "timer/icount",
639 .version_id = 1,
640 .minimum_version_id = 1,
5cd8cada 641 .needed = icount_state_needed,
d09eae37
FK
642 .fields = (VMStateField[]) {
643 VMSTATE_INT64(qemu_icount_bias, TimersState),
644 VMSTATE_INT64(qemu_icount, TimersState),
645 VMSTATE_END_OF_LIST()
646 }
647};
648
946fb27c
PB
649static const VMStateDescription vmstate_timers = {
650 .name = "timer",
651 .version_id = 2,
652 .minimum_version_id = 1,
35d08458 653 .fields = (VMStateField[]) {
946fb27c
PB
654 VMSTATE_INT64(cpu_ticks_offset, TimersState),
655 VMSTATE_INT64(dummy, TimersState),
656 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
657 VMSTATE_END_OF_LIST()
d09eae37 658 },
5cd8cada
JQ
659 .subsections = (const VMStateDescription*[]) {
660 &icount_vmstate_timers,
661 NULL
946fb27c
PB
662 }
663};
664
14e6fe12 665static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 666{
2adcc85d
JH
667 double pct;
668 double throttle_ratio;
669 long sleeptime_ns;
670
671 if (!cpu_throttle_get_percentage()) {
672 return;
673 }
674
675 pct = (double)cpu_throttle_get_percentage()/100;
676 throttle_ratio = pct / (1 - pct);
677 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
678
679 qemu_mutex_unlock_iothread();
680 atomic_set(&cpu->throttle_thread_scheduled, 0);
681 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
682 qemu_mutex_lock_iothread();
683}
684
685static void cpu_throttle_timer_tick(void *opaque)
686{
687 CPUState *cpu;
688 double pct;
689
690 /* Stop the timer if needed */
691 if (!cpu_throttle_get_percentage()) {
692 return;
693 }
694 CPU_FOREACH(cpu) {
695 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
696 async_run_on_cpu(cpu, cpu_throttle_thread,
697 RUN_ON_CPU_NULL);
2adcc85d
JH
698 }
699 }
700
701 pct = (double)cpu_throttle_get_percentage()/100;
702 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
703 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
704}
705
706void cpu_throttle_set(int new_throttle_pct)
707{
708 /* Ensure throttle percentage is within valid range */
709 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
710 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
711
712 atomic_set(&throttle_percentage, new_throttle_pct);
713
714 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
715 CPU_THROTTLE_TIMESLICE_NS);
716}
717
718void cpu_throttle_stop(void)
719{
720 atomic_set(&throttle_percentage, 0);
721}
722
723bool cpu_throttle_active(void)
724{
725 return (cpu_throttle_get_percentage() != 0);
726}
727
728int cpu_throttle_get_percentage(void)
729{
730 return atomic_read(&throttle_percentage);
731}
732
4603ea01
PD
733void cpu_ticks_init(void)
734{
ccdb3c1f 735 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 736 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
737 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
738 cpu_throttle_timer_tick, NULL);
4603ea01
PD
739}
740
1ad9580b 741void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 742{
1ad9580b 743 const char *option;
a8bfac37 744 char *rem_str = NULL;
1ad9580b 745
1ad9580b 746 option = qemu_opt_get(opts, "shift");
946fb27c 747 if (!option) {
a8bfac37
ST
748 if (qemu_opt_get(opts, "align") != NULL) {
749 error_setg(errp, "Please specify shift option when using align");
750 }
946fb27c
PB
751 return;
752 }
f1f4b57e
VC
753
754 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
755 if (icount_sleep) {
756 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 757 icount_timer_cb, NULL);
5045e9d9 758 }
f1f4b57e 759
a8bfac37 760 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
761
762 if (icount_align_option && !icount_sleep) {
778d9f9b 763 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 764 }
946fb27c 765 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
766 errno = 0;
767 icount_time_shift = strtol(option, &rem_str, 0);
768 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
769 error_setg(errp, "icount: Invalid shift value");
770 }
946fb27c
PB
771 use_icount = 1;
772 return;
a8bfac37
ST
773 } else if (icount_align_option) {
774 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 775 } else if (!icount_sleep) {
778d9f9b 776 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
777 }
778
779 use_icount = 2;
780
781 /* 125MIPS seems a reasonable initial guess at the guest speed.
782 It will be corrected fairly quickly anyway. */
783 icount_time_shift = 3;
784
785 /* Have both realtime and virtual time triggers for speed adjustment.
786 The realtime trigger catches emulated time passing too slowly,
787 the virtual time trigger catches emulated time passing too fast.
788 Realtime triggers occur even when idle, so use them less frequently
789 than VM triggers. */
bf2a7ddb
PD
790 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
791 icount_adjust_rt, NULL);
40daca54 792 timer_mod(icount_rt_timer,
bf2a7ddb 793 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
794 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
795 icount_adjust_vm, NULL);
796 timer_mod(icount_vm_timer,
797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 798 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
799}
800
6546706d
AB
801/***********************************************************/
802/* TCG vCPU kick timer
803 *
804 * The kick timer is responsible for moving single threaded vCPU
805 * emulation on to the next vCPU. If more than one vCPU is running a
806 * timer event with force a cpu->exit so the next vCPU can get
807 * scheduled.
808 *
809 * The timer is removed if all vCPUs are idle and restarted again once
810 * idleness is complete.
811 */
812
813static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 814static CPUState *tcg_current_rr_cpu;
6546706d
AB
815
816#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
817
818static inline int64_t qemu_tcg_next_kick(void)
819{
820 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
821}
822
791158d9
AB
823/* Kick the currently round-robin scheduled vCPU */
824static void qemu_cpu_kick_rr_cpu(void)
825{
826 CPUState *cpu;
791158d9
AB
827 do {
828 cpu = atomic_mb_read(&tcg_current_rr_cpu);
829 if (cpu) {
830 cpu_exit(cpu);
831 }
832 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
833}
834
6b8f0187
PB
835static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
836{
837}
838
3f53bc61
PB
839void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
840{
6b8f0187
PB
841 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
842 qemu_notify_event();
843 return;
844 }
845
846 if (!qemu_in_vcpu_thread() && first_cpu) {
847 /* qemu_cpu_kick is not enough to kick a halted CPU out of
848 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
849 * causes cpu_thread_is_idle to return false. This way,
850 * handle_icount_deadline can run.
851 */
852 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
853 }
3f53bc61
PB
854}
855
6546706d
AB
856static void kick_tcg_thread(void *opaque)
857{
858 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 859 qemu_cpu_kick_rr_cpu();
6546706d
AB
860}
861
862static void start_tcg_kick_timer(void)
863{
37257942 864 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
865 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
866 kick_tcg_thread, NULL);
867 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
868 }
869}
870
871static void stop_tcg_kick_timer(void)
872{
873 if (tcg_kick_vcpu_timer) {
874 timer_del(tcg_kick_vcpu_timer);
875 tcg_kick_vcpu_timer = NULL;
876 }
877}
878
296af7c9
BS
879/***********************************************************/
880void hw_error(const char *fmt, ...)
881{
882 va_list ap;
55e5c285 883 CPUState *cpu;
296af7c9
BS
884
885 va_start(ap, fmt);
886 fprintf(stderr, "qemu: hardware error: ");
887 vfprintf(stderr, fmt, ap);
888 fprintf(stderr, "\n");
bdc44640 889 CPU_FOREACH(cpu) {
55e5c285 890 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 891 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
892 }
893 va_end(ap);
894 abort();
895}
896
897void cpu_synchronize_all_states(void)
898{
182735ef 899 CPUState *cpu;
296af7c9 900
bdc44640 901 CPU_FOREACH(cpu) {
182735ef 902 cpu_synchronize_state(cpu);
296af7c9
BS
903 }
904}
905
906void cpu_synchronize_all_post_reset(void)
907{
182735ef 908 CPUState *cpu;
296af7c9 909
bdc44640 910 CPU_FOREACH(cpu) {
182735ef 911 cpu_synchronize_post_reset(cpu);
296af7c9
BS
912 }
913}
914
915void cpu_synchronize_all_post_init(void)
916{
182735ef 917 CPUState *cpu;
296af7c9 918
bdc44640 919 CPU_FOREACH(cpu) {
182735ef 920 cpu_synchronize_post_init(cpu);
296af7c9
BS
921 }
922}
923
56983463 924static int do_vm_stop(RunState state)
296af7c9 925{
56983463
KW
926 int ret = 0;
927
1354869c 928 if (runstate_is_running()) {
296af7c9 929 cpu_disable_ticks();
296af7c9 930 pause_all_vcpus();
f5bbfba1 931 runstate_set(state);
1dfb4dd9 932 vm_state_notify(0, state);
a4e15de9 933 qapi_event_send_stop(&error_abort);
296af7c9 934 }
56983463 935
594a45ce 936 bdrv_drain_all();
6d0ceb80 937 replay_disable_events();
22af08ea 938 ret = bdrv_flush_all();
594a45ce 939
56983463 940 return ret;
296af7c9
BS
941}
942
a1fcaa73 943static bool cpu_can_run(CPUState *cpu)
296af7c9 944{
4fdeee7c 945 if (cpu->stop) {
a1fcaa73 946 return false;
0ab07c62 947 }
321bc0b2 948 if (cpu_is_stopped(cpu)) {
a1fcaa73 949 return false;
0ab07c62 950 }
a1fcaa73 951 return true;
296af7c9
BS
952}
953
91325046 954static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 955{
64f6b346 956 gdb_set_stop_cpu(cpu);
8cf71710 957 qemu_system_debug_request();
f324e766 958 cpu->stopped = true;
3c638d06
JK
959}
960
6d9cb73c
JK
961#ifdef CONFIG_LINUX
962static void sigbus_reraise(void)
963{
964 sigset_t set;
965 struct sigaction action;
966
967 memset(&action, 0, sizeof(action));
968 action.sa_handler = SIG_DFL;
969 if (!sigaction(SIGBUS, &action, NULL)) {
970 raise(SIGBUS);
971 sigemptyset(&set);
972 sigaddset(&set, SIGBUS);
a2d1761d 973 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
974 }
975 perror("Failed to re-raise SIGBUS!\n");
976 abort();
977}
978
d98d4072 979static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 980{
a16fc07e
PB
981 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
982 sigbus_reraise();
983 }
984
2ae41db2
PB
985 if (current_cpu) {
986 /* Called asynchronously in VCPU thread. */
987 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
988 sigbus_reraise();
989 }
990 } else {
991 /* Called synchronously (via signalfd) in main thread. */
992 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
993 sigbus_reraise();
994 }
6d9cb73c
JK
995 }
996}
997
998static void qemu_init_sigbus(void)
999{
1000 struct sigaction action;
1001
1002 memset(&action, 0, sizeof(action));
1003 action.sa_flags = SA_SIGINFO;
d98d4072 1004 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1005 sigaction(SIGBUS, &action, NULL);
1006
1007 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1008}
6d9cb73c 1009#else /* !CONFIG_LINUX */
6d9cb73c
JK
1010static void qemu_init_sigbus(void)
1011{
1012}
a16fc07e 1013#endif /* !CONFIG_LINUX */
ff48eb5f 1014
b2532d88 1015static QemuMutex qemu_global_mutex;
296af7c9
BS
1016
1017static QemuThread io_thread;
1018
296af7c9
BS
1019/* cpu creation */
1020static QemuCond qemu_cpu_cond;
1021/* system init */
296af7c9
BS
1022static QemuCond qemu_pause_cond;
1023
d3b12f5d 1024void qemu_init_cpu_loop(void)
296af7c9 1025{
6d9cb73c 1026 qemu_init_sigbus();
ed94592b 1027 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1028 qemu_cond_init(&qemu_pause_cond);
296af7c9 1029 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1030
b7680cb6 1031 qemu_thread_get_self(&io_thread);
296af7c9
BS
1032}
1033
14e6fe12 1034void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1035{
d148d90e 1036 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1037}
1038
4c055ab5
GZ
1039static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1040{
1041 if (kvm_destroy_vcpu(cpu) < 0) {
1042 error_report("kvm_destroy_vcpu failed");
1043 exit(EXIT_FAILURE);
1044 }
1045}
1046
1047static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1048{
1049}
1050
509a0d78 1051static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1052{
37257942 1053 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1054 if (cpu->stop) {
1055 cpu->stop = false;
f324e766 1056 cpu->stopped = true;
96bce683 1057 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1058 }
a5403c69 1059 process_queued_cpu_work(cpu);
37257942
AB
1060}
1061
1062static bool qemu_tcg_should_sleep(CPUState *cpu)
1063{
1064 if (mttcg_enabled) {
1065 return cpu_thread_is_idle(cpu);
1066 } else {
1067 return all_cpu_threads_idle();
1068 }
296af7c9
BS
1069}
1070
d5f8d613 1071static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1072{
37257942 1073 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1074 stop_tcg_kick_timer();
d5f8d613 1075 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1076 }
296af7c9 1077
6546706d
AB
1078 start_tcg_kick_timer();
1079
37257942 1080 qemu_wait_io_event_common(cpu);
296af7c9
BS
1081}
1082
fd529e8f 1083static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1084{
a98ae1d8 1085 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1086 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1087 }
296af7c9 1088
509a0d78 1089 qemu_wait_io_event_common(cpu);
296af7c9
BS
1090}
1091
7e97cd88 1092static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1093{
48a106bd 1094 CPUState *cpu = arg;
84b4915d 1095 int r;
296af7c9 1096
ab28bd23
PB
1097 rcu_register_thread();
1098
2e7f7a3c 1099 qemu_mutex_lock_iothread();
814e612e 1100 qemu_thread_get_self(cpu->thread);
9f09e18a 1101 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1102 cpu->can_do_io = 1;
4917cf44 1103 current_cpu = cpu;
296af7c9 1104
504134d2 1105 r = kvm_init_vcpu(cpu);
84b4915d
JK
1106 if (r < 0) {
1107 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1108 exit(1);
1109 }
296af7c9 1110
18268b60 1111 kvm_init_cpu_signals(cpu);
296af7c9
BS
1112
1113 /* signal CPU creation */
61a46217 1114 cpu->created = true;
296af7c9
BS
1115 qemu_cond_signal(&qemu_cpu_cond);
1116
4c055ab5 1117 do {
a1fcaa73 1118 if (cpu_can_run(cpu)) {
1458c363 1119 r = kvm_cpu_exec(cpu);
83f338f7 1120 if (r == EXCP_DEBUG) {
91325046 1121 cpu_handle_guest_debug(cpu);
83f338f7 1122 }
0ab07c62 1123 }
fd529e8f 1124 qemu_kvm_wait_io_event(cpu);
4c055ab5 1125 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1126
4c055ab5 1127 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1128 cpu->created = false;
1129 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1130 qemu_mutex_unlock_iothread();
296af7c9
BS
1131 return NULL;
1132}
1133
c7f0f3b1
AL
1134static void *qemu_dummy_cpu_thread_fn(void *arg)
1135{
1136#ifdef _WIN32
1137 fprintf(stderr, "qtest is not supported under Windows\n");
1138 exit(1);
1139#else
10a9021d 1140 CPUState *cpu = arg;
c7f0f3b1
AL
1141 sigset_t waitset;
1142 int r;
1143
ab28bd23
PB
1144 rcu_register_thread();
1145
c7f0f3b1 1146 qemu_mutex_lock_iothread();
814e612e 1147 qemu_thread_get_self(cpu->thread);
9f09e18a 1148 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1149 cpu->can_do_io = 1;
37257942 1150 current_cpu = cpu;
c7f0f3b1
AL
1151
1152 sigemptyset(&waitset);
1153 sigaddset(&waitset, SIG_IPI);
1154
1155 /* signal CPU creation */
61a46217 1156 cpu->created = true;
c7f0f3b1
AL
1157 qemu_cond_signal(&qemu_cpu_cond);
1158
c7f0f3b1 1159 while (1) {
c7f0f3b1
AL
1160 qemu_mutex_unlock_iothread();
1161 do {
1162 int sig;
1163 r = sigwait(&waitset, &sig);
1164 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1165 if (r == -1) {
1166 perror("sigwait");
1167 exit(1);
1168 }
1169 qemu_mutex_lock_iothread();
509a0d78 1170 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1171 }
1172
1173 return NULL;
1174#endif
1175}
1176
1be7fcb8
AB
1177static int64_t tcg_get_icount_limit(void)
1178{
1179 int64_t deadline;
1180
1181 if (replay_mode != REPLAY_MODE_PLAY) {
1182 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1183
1184 /* Maintain prior (possibly buggy) behaviour where if no deadline
1185 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1186 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1187 * nanoseconds.
1188 */
1189 if ((deadline < 0) || (deadline > INT32_MAX)) {
1190 deadline = INT32_MAX;
1191 }
1192
1193 return qemu_icount_round(deadline);
1194 } else {
1195 return replay_get_instructions();
1196 }
1197}
1198
12e9700d
AB
1199static void handle_icount_deadline(void)
1200{
6b8f0187 1201 assert(qemu_in_vcpu_thread());
12e9700d
AB
1202 if (use_icount) {
1203 int64_t deadline =
1204 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1205
1206 if (deadline == 0) {
6b8f0187 1207 /* Wake up other AioContexts. */
12e9700d 1208 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1209 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1210 }
1211 }
1212}
1213
05248382 1214static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1215{
1be7fcb8 1216 if (use_icount) {
eda5f7c6 1217 int insns_left;
05248382
AB
1218
1219 /* These should always be cleared by process_icount_data after
1220 * each vCPU execution. However u16.high can be raised
1221 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1222 */
1223 g_assert(cpu->icount_decr.u16.low == 0);
1224 g_assert(cpu->icount_extra == 0);
1225
eda5f7c6
AB
1226 cpu->icount_budget = tcg_get_icount_limit();
1227 insns_left = MIN(0xffff, cpu->icount_budget);
1228 cpu->icount_decr.u16.low = insns_left;
1229 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1230 }
05248382
AB
1231}
1232
1233static void process_icount_data(CPUState *cpu)
1234{
1be7fcb8 1235 if (use_icount) {
e4cd9657 1236 /* Account for executed instructions */
512d3c80 1237 cpu_update_icount(cpu);
05248382
AB
1238
1239 /* Reset the counters */
1240 cpu->icount_decr.u16.low = 0;
1be7fcb8 1241 cpu->icount_extra = 0;
e4cd9657
AB
1242 cpu->icount_budget = 0;
1243
1be7fcb8
AB
1244 replay_account_executed_instructions();
1245 }
05248382
AB
1246}
1247
1248
1249static int tcg_cpu_exec(CPUState *cpu)
1250{
1251 int ret;
1252#ifdef CONFIG_PROFILER
1253 int64_t ti;
1254#endif
1255
1256#ifdef CONFIG_PROFILER
1257 ti = profile_getclock();
1258#endif
1259 qemu_mutex_unlock_iothread();
1260 cpu_exec_start(cpu);
1261 ret = cpu_exec(cpu);
1262 cpu_exec_end(cpu);
1263 qemu_mutex_lock_iothread();
1264#ifdef CONFIG_PROFILER
1265 tcg_time += profile_getclock() - ti;
1266#endif
1be7fcb8
AB
1267 return ret;
1268}
1269
c93bbbef
AB
1270/* Destroy any remaining vCPUs which have been unplugged and have
1271 * finished running
1272 */
1273static void deal_with_unplugged_cpus(void)
1be7fcb8 1274{
c93bbbef 1275 CPUState *cpu;
1be7fcb8 1276
c93bbbef
AB
1277 CPU_FOREACH(cpu) {
1278 if (cpu->unplug && !cpu_can_run(cpu)) {
1279 qemu_tcg_destroy_vcpu(cpu);
1280 cpu->created = false;
1281 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1282 break;
1283 }
1284 }
1be7fcb8 1285}
bdb7ca67 1286
6546706d
AB
1287/* Single-threaded TCG
1288 *
1289 * In the single-threaded case each vCPU is simulated in turn. If
1290 * there is more than a single vCPU we create a simple timer to kick
1291 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1292 * This is done explicitly rather than relying on side-effects
1293 * elsewhere.
1294 */
1295
37257942 1296static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1297{
c3586ba7 1298 CPUState *cpu = arg;
296af7c9 1299
ab28bd23
PB
1300 rcu_register_thread();
1301
2e7f7a3c 1302 qemu_mutex_lock_iothread();
814e612e 1303 qemu_thread_get_self(cpu->thread);
296af7c9 1304
38fcbd3f
AF
1305 CPU_FOREACH(cpu) {
1306 cpu->thread_id = qemu_get_thread_id();
1307 cpu->created = true;
626cf8f4 1308 cpu->can_do_io = 1;
38fcbd3f 1309 }
296af7c9
BS
1310 qemu_cond_signal(&qemu_cpu_cond);
1311
fa7d1867 1312 /* wait for initial kick-off after machine start */
c28e399c 1313 while (first_cpu->stopped) {
d5f8d613 1314 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1315
1316 /* process any pending work */
bdc44640 1317 CPU_FOREACH(cpu) {
37257942 1318 current_cpu = cpu;
182735ef 1319 qemu_wait_io_event_common(cpu);
8e564b4e 1320 }
0ab07c62 1321 }
296af7c9 1322
6546706d
AB
1323 start_tcg_kick_timer();
1324
c93bbbef
AB
1325 cpu = first_cpu;
1326
e5143e30
AB
1327 /* process any pending work */
1328 cpu->exit_request = 1;
1329
296af7c9 1330 while (1) {
c93bbbef
AB
1331 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1332 qemu_account_warp_timer();
1333
6b8f0187
PB
1334 /* Run the timers here. This is much more efficient than
1335 * waking up the I/O thread and waiting for completion.
1336 */
1337 handle_icount_deadline();
1338
c93bbbef
AB
1339 if (!cpu) {
1340 cpu = first_cpu;
1341 }
1342
e5143e30
AB
1343 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1344
791158d9 1345 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1346 current_cpu = cpu;
c93bbbef
AB
1347
1348 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1349 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1350
1351 if (cpu_can_run(cpu)) {
1352 int r;
05248382
AB
1353
1354 prepare_icount_for_run(cpu);
1355
c93bbbef 1356 r = tcg_cpu_exec(cpu);
05248382
AB
1357
1358 process_icount_data(cpu);
1359
c93bbbef
AB
1360 if (r == EXCP_DEBUG) {
1361 cpu_handle_guest_debug(cpu);
1362 break;
08e73c48
PK
1363 } else if (r == EXCP_ATOMIC) {
1364 qemu_mutex_unlock_iothread();
1365 cpu_exec_step_atomic(cpu);
1366 qemu_mutex_lock_iothread();
1367 break;
c93bbbef 1368 }
37257942 1369 } else if (cpu->stop) {
c93bbbef
AB
1370 if (cpu->unplug) {
1371 cpu = CPU_NEXT(cpu);
1372 }
1373 break;
1374 }
1375
e5143e30
AB
1376 cpu = CPU_NEXT(cpu);
1377 } /* while (cpu && !cpu->exit_request).. */
1378
791158d9
AB
1379 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1380 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1381
e5143e30
AB
1382 if (cpu && cpu->exit_request) {
1383 atomic_mb_set(&cpu->exit_request, 0);
1384 }
ac70aafc 1385
37257942 1386 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1387 deal_with_unplugged_cpus();
296af7c9
BS
1388 }
1389
1390 return NULL;
1391}
1392
b0cb0a66
VP
1393static void *qemu_hax_cpu_thread_fn(void *arg)
1394{
1395 CPUState *cpu = arg;
1396 int r;
b3d3a426
VP
1397
1398 qemu_mutex_lock_iothread();
b0cb0a66 1399 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1400
1401 cpu->thread_id = qemu_get_thread_id();
1402 cpu->created = true;
1403 cpu->halted = 0;
1404 current_cpu = cpu;
1405
1406 hax_init_vcpu(cpu);
1407 qemu_cond_signal(&qemu_cpu_cond);
1408
1409 while (1) {
1410 if (cpu_can_run(cpu)) {
1411 r = hax_smp_cpu_exec(cpu);
1412 if (r == EXCP_DEBUG) {
1413 cpu_handle_guest_debug(cpu);
1414 }
1415 }
1416
1417 while (cpu_thread_is_idle(cpu)) {
1418 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1419 }
1420#ifdef _WIN32
1421 SleepEx(0, TRUE);
1422#endif
1423 qemu_wait_io_event_common(cpu);
1424 }
1425 return NULL;
1426}
1427
1428#ifdef _WIN32
1429static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1430{
1431}
1432#endif
1433
37257942
AB
1434/* Multi-threaded TCG
1435 *
1436 * In the multi-threaded case each vCPU has its own thread. The TLS
1437 * variable current_cpu can be used deep in the code to find the
1438 * current CPUState for a given thread.
1439 */
1440
1441static void *qemu_tcg_cpu_thread_fn(void *arg)
1442{
1443 CPUState *cpu = arg;
1444
bf51c720
AB
1445 g_assert(!use_icount);
1446
37257942
AB
1447 rcu_register_thread();
1448
1449 qemu_mutex_lock_iothread();
1450 qemu_thread_get_self(cpu->thread);
1451
1452 cpu->thread_id = qemu_get_thread_id();
1453 cpu->created = true;
1454 cpu->can_do_io = 1;
1455 current_cpu = cpu;
1456 qemu_cond_signal(&qemu_cpu_cond);
1457
1458 /* process any pending work */
1459 cpu->exit_request = 1;
1460
1461 while (1) {
1462 if (cpu_can_run(cpu)) {
1463 int r;
1464 r = tcg_cpu_exec(cpu);
1465 switch (r) {
1466 case EXCP_DEBUG:
1467 cpu_handle_guest_debug(cpu);
1468 break;
1469 case EXCP_HALTED:
1470 /* during start-up the vCPU is reset and the thread is
1471 * kicked several times. If we don't ensure we go back
1472 * to sleep in the halted state we won't cleanly
1473 * start-up when the vCPU is enabled.
1474 *
1475 * cpu->halted should ensure we sleep in wait_io_event
1476 */
1477 g_assert(cpu->halted);
1478 break;
08e73c48
PK
1479 case EXCP_ATOMIC:
1480 qemu_mutex_unlock_iothread();
1481 cpu_exec_step_atomic(cpu);
1482 qemu_mutex_lock_iothread();
37257942
AB
1483 default:
1484 /* Ignore everything else? */
1485 break;
1486 }
a3e53273
BR
1487 } else if (cpu->unplug) {
1488 qemu_tcg_destroy_vcpu(cpu);
1489 cpu->created = false;
1490 qemu_cond_signal(&qemu_cpu_cond);
1491 qemu_mutex_unlock_iothread();
1492 return NULL;
37257942
AB
1493 }
1494
37257942
AB
1495 atomic_mb_set(&cpu->exit_request, 0);
1496 qemu_tcg_wait_io_event(cpu);
1497 }
1498
1499 return NULL;
1500}
1501
2ff09a40 1502static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1503{
1504#ifndef _WIN32
1505 int err;
1506
e0c38211
PB
1507 if (cpu->thread_kicked) {
1508 return;
9102deda 1509 }
e0c38211 1510 cpu->thread_kicked = true;
814e612e 1511 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1512 if (err) {
1513 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1514 exit(1);
1515 }
1516#else /* _WIN32 */
b0cb0a66
VP
1517 if (!qemu_cpu_is_self(cpu)) {
1518 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1519 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1520 __func__, GetLastError());
1521 exit(1);
1522 }
1523 }
e0c38211
PB
1524#endif
1525}
ed9164a3 1526
c08d7424 1527void qemu_cpu_kick(CPUState *cpu)
296af7c9 1528{
f5c121b8 1529 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1530 if (tcg_enabled()) {
791158d9 1531 cpu_exit(cpu);
37257942 1532 /* NOP unless doing single-thread RR */
791158d9 1533 qemu_cpu_kick_rr_cpu();
e0c38211 1534 } else {
b0cb0a66
VP
1535 if (hax_enabled()) {
1536 /*
1537 * FIXME: race condition with the exit_request check in
1538 * hax_vcpu_hax_exec
1539 */
1540 cpu->exit_request = 1;
1541 }
e0c38211
PB
1542 qemu_cpu_kick_thread(cpu);
1543 }
296af7c9
BS
1544}
1545
46d62fac 1546void qemu_cpu_kick_self(void)
296af7c9 1547{
4917cf44 1548 assert(current_cpu);
9102deda 1549 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1550}
1551
60e82579 1552bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1553{
814e612e 1554 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1555}
1556
79e2b9ae 1557bool qemu_in_vcpu_thread(void)
aa723c23 1558{
4917cf44 1559 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1560}
1561
afbe7053
PB
1562static __thread bool iothread_locked = false;
1563
1564bool qemu_mutex_iothread_locked(void)
1565{
1566 return iothread_locked;
1567}
1568
296af7c9
BS
1569void qemu_mutex_lock_iothread(void)
1570{
8d04fb55
JK
1571 g_assert(!qemu_mutex_iothread_locked());
1572 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1573 iothread_locked = true;
296af7c9
BS
1574}
1575
1576void qemu_mutex_unlock_iothread(void)
1577{
8d04fb55 1578 g_assert(qemu_mutex_iothread_locked());
afbe7053 1579 iothread_locked = false;
296af7c9
BS
1580 qemu_mutex_unlock(&qemu_global_mutex);
1581}
1582
e8faee06 1583static bool all_vcpus_paused(void)
296af7c9 1584{
bdc44640 1585 CPUState *cpu;
296af7c9 1586
bdc44640 1587 CPU_FOREACH(cpu) {
182735ef 1588 if (!cpu->stopped) {
e8faee06 1589 return false;
0ab07c62 1590 }
296af7c9
BS
1591 }
1592
e8faee06 1593 return true;
296af7c9
BS
1594}
1595
1596void pause_all_vcpus(void)
1597{
bdc44640 1598 CPUState *cpu;
296af7c9 1599
40daca54 1600 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1601 CPU_FOREACH(cpu) {
182735ef
AF
1602 cpu->stop = true;
1603 qemu_cpu_kick(cpu);
296af7c9
BS
1604 }
1605
aa723c23 1606 if (qemu_in_vcpu_thread()) {
d798e974 1607 cpu_stop_current();
d798e974
JK
1608 }
1609
296af7c9 1610 while (!all_vcpus_paused()) {
be7d6c57 1611 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1612 CPU_FOREACH(cpu) {
182735ef 1613 qemu_cpu_kick(cpu);
296af7c9
BS
1614 }
1615 }
1616}
1617
2993683b
IM
1618void cpu_resume(CPUState *cpu)
1619{
1620 cpu->stop = false;
1621 cpu->stopped = false;
1622 qemu_cpu_kick(cpu);
1623}
1624
296af7c9
BS
1625void resume_all_vcpus(void)
1626{
bdc44640 1627 CPUState *cpu;
296af7c9 1628
40daca54 1629 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1630 CPU_FOREACH(cpu) {
182735ef 1631 cpu_resume(cpu);
296af7c9
BS
1632 }
1633}
1634
4c055ab5
GZ
1635void cpu_remove(CPUState *cpu)
1636{
1637 cpu->stop = true;
1638 cpu->unplug = true;
1639 qemu_cpu_kick(cpu);
1640}
1641
2c579042
BR
1642void cpu_remove_sync(CPUState *cpu)
1643{
1644 cpu_remove(cpu);
1645 while (cpu->created) {
1646 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1647 }
1648}
1649
4900116e
DDAG
1650/* For temporary buffers for forming a name */
1651#define VCPU_THREAD_NAME_SIZE 16
1652
e5ab30a2 1653static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1654{
4900116e 1655 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1656 static QemuCond *single_tcg_halt_cond;
1657 static QemuThread *single_tcg_cpu_thread;
4900116e 1658
37257942 1659 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1660 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1661 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1662 qemu_cond_init(cpu->halt_cond);
37257942
AB
1663
1664 if (qemu_tcg_mttcg_enabled()) {
1665 /* create a thread per vCPU with TCG (MTTCG) */
1666 parallel_cpus = true;
1667 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1668 cpu->cpu_index);
37257942
AB
1669
1670 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1671 cpu, QEMU_THREAD_JOINABLE);
1672
1673 } else {
1674 /* share a single thread for all cpus with TCG */
1675 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1676 qemu_thread_create(cpu->thread, thread_name,
1677 qemu_tcg_rr_cpu_thread_fn,
1678 cpu, QEMU_THREAD_JOINABLE);
1679
1680 single_tcg_halt_cond = cpu->halt_cond;
1681 single_tcg_cpu_thread = cpu->thread;
1682 }
1ecf47bf 1683#ifdef _WIN32
814e612e 1684 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1685#endif
61a46217 1686 while (!cpu->created) {
18a85728 1687 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1688 }
296af7c9 1689 } else {
37257942
AB
1690 /* For non-MTTCG cases we share the thread */
1691 cpu->thread = single_tcg_cpu_thread;
1692 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1693 }
1694}
1695
b0cb0a66
VP
1696static void qemu_hax_start_vcpu(CPUState *cpu)
1697{
1698 char thread_name[VCPU_THREAD_NAME_SIZE];
1699
1700 cpu->thread = g_malloc0(sizeof(QemuThread));
1701 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1702 qemu_cond_init(cpu->halt_cond);
1703
1704 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1705 cpu->cpu_index);
1706 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1707 cpu, QEMU_THREAD_JOINABLE);
1708#ifdef _WIN32
1709 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1710#endif
1711 while (!cpu->created) {
1712 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1713 }
1714}
1715
48a106bd 1716static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1717{
4900116e
DDAG
1718 char thread_name[VCPU_THREAD_NAME_SIZE];
1719
814e612e 1720 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1721 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1722 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1723 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1724 cpu->cpu_index);
1725 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1726 cpu, QEMU_THREAD_JOINABLE);
61a46217 1727 while (!cpu->created) {
18a85728 1728 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1729 }
296af7c9
BS
1730}
1731
10a9021d 1732static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1733{
4900116e
DDAG
1734 char thread_name[VCPU_THREAD_NAME_SIZE];
1735
814e612e 1736 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1737 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1738 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1739 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1740 cpu->cpu_index);
1741 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1742 QEMU_THREAD_JOINABLE);
61a46217 1743 while (!cpu->created) {
c7f0f3b1
AL
1744 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1745 }
1746}
1747
c643bed9 1748void qemu_init_vcpu(CPUState *cpu)
296af7c9 1749{
ce3960eb
AF
1750 cpu->nr_cores = smp_cores;
1751 cpu->nr_threads = smp_threads;
f324e766 1752 cpu->stopped = true;
56943e8c
PM
1753
1754 if (!cpu->as) {
1755 /* If the target cpu hasn't set up any address spaces itself,
1756 * give it the default one.
1757 */
6731d864
PC
1758 AddressSpace *as = address_space_init_shareable(cpu->memory,
1759 "cpu-memory");
12ebc9a7 1760 cpu->num_ases = 1;
6731d864 1761 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1762 }
1763
0ab07c62 1764 if (kvm_enabled()) {
48a106bd 1765 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1766 } else if (hax_enabled()) {
1767 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1768 } else if (tcg_enabled()) {
e5ab30a2 1769 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1770 } else {
10a9021d 1771 qemu_dummy_start_vcpu(cpu);
0ab07c62 1772 }
296af7c9
BS
1773}
1774
b4a3d965 1775void cpu_stop_current(void)
296af7c9 1776{
4917cf44
AF
1777 if (current_cpu) {
1778 current_cpu->stop = false;
1779 current_cpu->stopped = true;
1780 cpu_exit(current_cpu);
96bce683 1781 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1782 }
296af7c9
BS
1783}
1784
56983463 1785int vm_stop(RunState state)
296af7c9 1786{
aa723c23 1787 if (qemu_in_vcpu_thread()) {
74892d24 1788 qemu_system_vmstop_request_prepare();
1dfb4dd9 1789 qemu_system_vmstop_request(state);
296af7c9
BS
1790 /*
1791 * FIXME: should not return to device code in case
1792 * vm_stop() has been requested.
1793 */
b4a3d965 1794 cpu_stop_current();
56983463 1795 return 0;
296af7c9 1796 }
56983463
KW
1797
1798 return do_vm_stop(state);
296af7c9
BS
1799}
1800
2d76e823
CI
1801/**
1802 * Prepare for (re)starting the VM.
1803 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1804 * running or in case of an error condition), 0 otherwise.
1805 */
1806int vm_prepare_start(void)
1807{
1808 RunState requested;
1809 int res = 0;
1810
1811 qemu_vmstop_requested(&requested);
1812 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1813 return -1;
1814 }
1815
1816 /* Ensure that a STOP/RESUME pair of events is emitted if a
1817 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1818 * example, according to documentation is always followed by
1819 * the STOP event.
1820 */
1821 if (runstate_is_running()) {
1822 qapi_event_send_stop(&error_abort);
1823 res = -1;
1824 } else {
1825 replay_enable_events();
1826 cpu_enable_ticks();
1827 runstate_set(RUN_STATE_RUNNING);
1828 vm_state_notify(1, RUN_STATE_RUNNING);
1829 }
1830
1831 /* We are sending this now, but the CPUs will be resumed shortly later */
1832 qapi_event_send_resume(&error_abort);
1833 return res;
1834}
1835
1836void vm_start(void)
1837{
1838 if (!vm_prepare_start()) {
1839 resume_all_vcpus();
1840 }
1841}
1842
8a9236f1
LC
1843/* does a state transition even if the VM is already stopped,
1844 current state is forgotten forever */
56983463 1845int vm_stop_force_state(RunState state)
8a9236f1
LC
1846{
1847 if (runstate_is_running()) {
56983463 1848 return vm_stop(state);
8a9236f1
LC
1849 } else {
1850 runstate_set(state);
b2780d32
WC
1851
1852 bdrv_drain_all();
594a45ce
KW
1853 /* Make sure to return an error if the flush in a previous vm_stop()
1854 * failed. */
22af08ea 1855 return bdrv_flush_all();
8a9236f1
LC
1856 }
1857}
1858
9a78eead 1859void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1860{
1861 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1862#if defined(cpu_list)
1863 cpu_list(f, cpu_fprintf);
262353cb
BS
1864#endif
1865}
de0b36b6
LC
1866
1867CpuInfoList *qmp_query_cpus(Error **errp)
1868{
afed5a5a
IM
1869 MachineState *ms = MACHINE(qdev_get_machine());
1870 MachineClass *mc = MACHINE_GET_CLASS(ms);
de0b36b6 1871 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1872 CPUState *cpu;
de0b36b6 1873
bdc44640 1874 CPU_FOREACH(cpu) {
de0b36b6 1875 CpuInfoList *info;
182735ef
AF
1876#if defined(TARGET_I386)
1877 X86CPU *x86_cpu = X86_CPU(cpu);
1878 CPUX86State *env = &x86_cpu->env;
1879#elif defined(TARGET_PPC)
1880 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1881 CPUPPCState *env = &ppc_cpu->env;
1882#elif defined(TARGET_SPARC)
1883 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1884 CPUSPARCState *env = &sparc_cpu->env;
1885#elif defined(TARGET_MIPS)
1886 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1887 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1888#elif defined(TARGET_TRICORE)
1889 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1890 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1891#endif
de0b36b6 1892
cb446eca 1893 cpu_synchronize_state(cpu);
de0b36b6
LC
1894
1895 info = g_malloc0(sizeof(*info));
1896 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1897 info->value->CPU = cpu->cpu_index;
182735ef 1898 info->value->current = (cpu == first_cpu);
259186a7 1899 info->value->halted = cpu->halted;
58f88d4b 1900 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1901 info->value->thread_id = cpu->thread_id;
de0b36b6 1902#if defined(TARGET_I386)
86f4b687 1903 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1904 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1905#elif defined(TARGET_PPC)
86f4b687 1906 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1907 info->value->u.ppc.nip = env->nip;
de0b36b6 1908#elif defined(TARGET_SPARC)
86f4b687 1909 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1910 info->value->u.q_sparc.pc = env->pc;
1911 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1912#elif defined(TARGET_MIPS)
86f4b687 1913 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1914 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1915#elif defined(TARGET_TRICORE)
86f4b687 1916 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1917 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1918#else
1919 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6 1920#endif
afed5a5a
IM
1921 info->value->has_props = !!mc->cpu_index_to_instance_props;
1922 if (info->value->has_props) {
1923 CpuInstanceProperties *props;
1924 props = g_malloc0(sizeof(*props));
1925 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1926 info->value->props = props;
1927 }
de0b36b6
LC
1928
1929 /* XXX: waiting for the qapi to support GSList */
1930 if (!cur_item) {
1931 head = cur_item = info;
1932 } else {
1933 cur_item->next = info;
1934 cur_item = info;
1935 }
1936 }
1937
1938 return head;
1939}
0cfd6a9a
LC
1940
1941void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1942 bool has_cpu, int64_t cpu_index, Error **errp)
1943{
1944 FILE *f;
1945 uint32_t l;
55e5c285 1946 CPUState *cpu;
0cfd6a9a 1947 uint8_t buf[1024];
0dc9daf0 1948 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1949
1950 if (!has_cpu) {
1951 cpu_index = 0;
1952 }
1953
151d1322
AF
1954 cpu = qemu_get_cpu(cpu_index);
1955 if (cpu == NULL) {
c6bd8c70
MA
1956 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1957 "a CPU number");
0cfd6a9a
LC
1958 return;
1959 }
1960
1961 f = fopen(filename, "wb");
1962 if (!f) {
618da851 1963 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1964 return;
1965 }
1966
1967 while (size != 0) {
1968 l = sizeof(buf);
1969 if (l > size)
1970 l = size;
2f4d0f59 1971 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1972 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1973 " specified", orig_addr, orig_size);
2f4d0f59
AK
1974 goto exit;
1975 }
0cfd6a9a 1976 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1977 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1978 goto exit;
1979 }
1980 addr += l;
1981 size -= l;
1982 }
1983
1984exit:
1985 fclose(f);
1986}
6d3962bf
LC
1987
1988void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1989 Error **errp)
1990{
1991 FILE *f;
1992 uint32_t l;
1993 uint8_t buf[1024];
1994
1995 f = fopen(filename, "wb");
1996 if (!f) {
618da851 1997 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1998 return;
1999 }
2000
2001 while (size != 0) {
2002 l = sizeof(buf);
2003 if (l > size)
2004 l = size;
eb6282f2 2005 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2006 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2007 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2008 goto exit;
2009 }
2010 addr += l;
2011 size -= l;
2012 }
2013
2014exit:
2015 fclose(f);
2016}
ab49ab5c
LC
2017
2018void qmp_inject_nmi(Error **errp)
2019{
9cb805fd 2020 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2021}
27498bef
ST
2022
2023void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2024{
2025 if (!use_icount) {
2026 return;
2027 }
2028
2029 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2030 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2031 if (icount_align_option) {
2032 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2033 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2034 } else {
2035 cpu_fprintf(f, "Max guest delay NA\n");
2036 cpu_fprintf(f, "Max guest advance NA\n");
2037 }
2038}