]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
s390x/pci: let pci devices start in configured mode
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
afed5a5a 53#include "hw/boards.h"
0ff0fc19 54
6d9cb73c
JK
55#ifdef CONFIG_LINUX
56
57#include <sys/prctl.h>
58
c0532a76
MT
59#ifndef PR_MCE_KILL
60#define PR_MCE_KILL 33
61#endif
62
6d9cb73c
JK
63#ifndef PR_MCE_KILL_SET
64#define PR_MCE_KILL_SET 1
65#endif
66
67#ifndef PR_MCE_KILL_EARLY
68#define PR_MCE_KILL_EARLY 1
69#endif
70
71#endif /* CONFIG_LINUX */
72
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
8d4e9146
FK
152bool mttcg_enabled;
153
154/*
155 * We default to false if we know other options have been enabled
156 * which are currently incompatible with MTTCG. Otherwise when each
157 * guest (target) has been updated to support:
158 * - atomic instructions
159 * - memory ordering primitives (barriers)
160 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 *
162 * Once a guest architecture has been converted to the new primitives
163 * there are two remaining limitations to check.
164 *
165 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
166 * - The host must have a stronger memory order than the guest
167 *
168 * It may be possible in future to support strong guests on weak hosts
169 * but that will require tagging all load/stores in a guest with their
170 * implicit memory order requirements which would likely slow things
171 * down a lot.
172 */
173
174static bool check_tcg_memory_orders_compatible(void)
175{
176#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
177 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
178#else
179 return false;
180#endif
181}
182
183static bool default_mttcg_enabled(void)
184{
83fd9629 185 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
186 return false;
187 } else {
188#ifdef TARGET_SUPPORTS_MTTCG
189 return check_tcg_memory_orders_compatible();
190#else
191 return false;
192#endif
193 }
194}
195
196void qemu_tcg_configure(QemuOpts *opts, Error **errp)
197{
198 const char *t = qemu_opt_get(opts, "thread");
199 if (t) {
200 if (strcmp(t, "multi") == 0) {
201 if (TCG_OVERSIZED_GUEST) {
202 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
203 } else if (use_icount) {
204 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 205 } else {
86953503 206#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
207 error_report("Guest not yet converted to MTTCG - "
208 "you may get unexpected results");
209#endif
8d4e9146
FK
210 if (!check_tcg_memory_orders_compatible()) {
211 error_report("Guest expects a stronger memory ordering "
212 "than the host provides");
8cfef892 213 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
214 }
215 mttcg_enabled = true;
216 }
217 } else if (strcmp(t, "single") == 0) {
218 mttcg_enabled = false;
219 } else {
220 error_setg(errp, "Invalid 'thread' setting %s", t);
221 }
222 } else {
223 mttcg_enabled = default_mttcg_enabled();
224 }
225}
946fb27c 226
e4cd9657
AB
227/* The current number of executed instructions is based on what we
228 * originally budgeted minus the current state of the decrementing
229 * icount counters in extra/u16.low.
230 */
231static int64_t cpu_get_icount_executed(CPUState *cpu)
232{
233 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
234}
235
512d3c80
AB
236/*
237 * Update the global shared timer_state.qemu_icount to take into
238 * account executed instructions. This is done by the TCG vCPU
239 * thread so the main-loop can see time has moved forward.
240 */
241void cpu_update_icount(CPUState *cpu)
242{
243 int64_t executed = cpu_get_icount_executed(cpu);
244 cpu->icount_budget -= executed;
245
246#ifdef CONFIG_ATOMIC64
247 atomic_set__nocheck(&timers_state.qemu_icount,
248 atomic_read__nocheck(&timers_state.qemu_icount) +
249 executed);
250#else /* FIXME: we need 64bit atomics to do this safely */
251 timers_state.qemu_icount += executed;
252#endif
253}
254
2a62914b 255int64_t cpu_get_icount_raw(void)
946fb27c 256{
4917cf44 257 CPUState *cpu = current_cpu;
946fb27c 258
243c5f77 259 if (cpu && cpu->running) {
414b15c9 260 if (!cpu->can_do_io) {
2a62914b
PD
261 fprintf(stderr, "Bad icount read\n");
262 exit(1);
946fb27c 263 }
e4cd9657 264 /* Take into account what has run */
1d05906b 265 cpu_update_icount(cpu);
946fb27c 266 }
1d05906b
AB
267#ifdef CONFIG_ATOMIC64
268 return atomic_read__nocheck(&timers_state.qemu_icount);
269#else /* FIXME: we need 64bit atomics to do this safely */
270 return timers_state.qemu_icount;
271#endif
2a62914b
PD
272}
273
274/* Return the virtual CPU time, based on the instruction counter. */
275static int64_t cpu_get_icount_locked(void)
276{
277 int64_t icount = cpu_get_icount_raw();
3f031313 278 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
279}
280
17a15f1b
PB
281int64_t cpu_get_icount(void)
282{
283 int64_t icount;
284 unsigned start;
285
286 do {
287 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
288 icount = cpu_get_icount_locked();
289 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
290
291 return icount;
292}
293
3f031313
FK
294int64_t cpu_icount_to_ns(int64_t icount)
295{
296 return icount << icount_time_shift;
297}
298
d90f3cca
C
299/* return the time elapsed in VM between vm_start and vm_stop. Unless
300 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
301 * counter.
302 *
303 * Caller must hold the BQL
304 */
946fb27c
PB
305int64_t cpu_get_ticks(void)
306{
5f3e3101
PB
307 int64_t ticks;
308
946fb27c
PB
309 if (use_icount) {
310 return cpu_get_icount();
311 }
5f3e3101
PB
312
313 ticks = timers_state.cpu_ticks_offset;
314 if (timers_state.cpu_ticks_enabled) {
4a7428c5 315 ticks += cpu_get_host_ticks();
5f3e3101
PB
316 }
317
318 if (timers_state.cpu_ticks_prev > ticks) {
319 /* Note: non increasing ticks may happen if the host uses
320 software suspend */
321 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
322 ticks = timers_state.cpu_ticks_prev;
946fb27c 323 }
5f3e3101
PB
324
325 timers_state.cpu_ticks_prev = ticks;
326 return ticks;
946fb27c
PB
327}
328
cb365646 329static int64_t cpu_get_clock_locked(void)
946fb27c 330{
1d45cea5 331 int64_t time;
cb365646 332
1d45cea5 333 time = timers_state.cpu_clock_offset;
5f3e3101 334 if (timers_state.cpu_ticks_enabled) {
1d45cea5 335 time += get_clock();
946fb27c 336 }
cb365646 337
1d45cea5 338 return time;
cb365646
LPF
339}
340
d90f3cca 341/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
342 * the time between vm_start and vm_stop
343 */
cb365646
LPF
344int64_t cpu_get_clock(void)
345{
346 int64_t ti;
347 unsigned start;
348
349 do {
350 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 ti = cpu_get_clock_locked();
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
353
354 return ti;
946fb27c
PB
355}
356
cb365646 357/* enable cpu_get_ticks()
3224e878 358 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 359 */
946fb27c
PB
360void cpu_enable_ticks(void)
361{
cb365646 362 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 363 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 364 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 365 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
366 timers_state.cpu_clock_offset -= get_clock();
367 timers_state.cpu_ticks_enabled = 1;
368 }
03719e44 369 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
370}
371
372/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 373 * cpu_get_ticks() after that.
3224e878 374 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 375 */
946fb27c
PB
376void cpu_disable_ticks(void)
377{
cb365646 378 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 379 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 380 if (timers_state.cpu_ticks_enabled) {
4a7428c5 381 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 382 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
383 timers_state.cpu_ticks_enabled = 0;
384 }
03719e44 385 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
386}
387
388/* Correlation between real and virtual time is always going to be
389 fairly approximate, so ignore small variation.
390 When the guest is idle real and virtual time will be aligned in
391 the IO wait loop. */
73bcb24d 392#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
393
394static void icount_adjust(void)
395{
396 int64_t cur_time;
397 int64_t cur_icount;
398 int64_t delta;
a3270e19
PB
399
400 /* Protected by TimersState mutex. */
946fb27c 401 static int64_t last_delta;
468cc7cf 402
946fb27c
PB
403 /* If the VM is not running, then do nothing. */
404 if (!runstate_is_running()) {
405 return;
406 }
468cc7cf 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
409 cur_time = cpu_get_clock_locked();
410 cur_icount = cpu_get_icount_locked();
468cc7cf 411
946fb27c
PB
412 delta = cur_icount - cur_time;
413 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
414 if (delta > 0
415 && last_delta + ICOUNT_WOBBLE < delta * 2
416 && icount_time_shift > 0) {
417 /* The guest is getting too far ahead. Slow time down. */
418 icount_time_shift--;
419 }
420 if (delta < 0
421 && last_delta - ICOUNT_WOBBLE > delta * 2
422 && icount_time_shift < MAX_ICOUNT_SHIFT) {
423 /* The guest is getting too far behind. Speed time up. */
424 icount_time_shift++;
425 }
426 last_delta = delta;
c96778bb
FK
427 timers_state.qemu_icount_bias = cur_icount
428 - (timers_state.qemu_icount << icount_time_shift);
03719e44 429 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
430}
431
432static void icount_adjust_rt(void *opaque)
433{
40daca54 434 timer_mod(icount_rt_timer,
1979b908 435 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
436 icount_adjust();
437}
438
439static void icount_adjust_vm(void *opaque)
440{
40daca54
AB
441 timer_mod(icount_vm_timer,
442 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 443 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
444 icount_adjust();
445}
446
447static int64_t qemu_icount_round(int64_t count)
448{
449 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
450}
451
efab87cf 452static void icount_warp_rt(void)
946fb27c 453{
ccffff48
AB
454 unsigned seq;
455 int64_t warp_start;
456
17a15f1b
PB
457 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
458 * changes from -1 to another value, so the race here is okay.
459 */
ccffff48
AB
460 do {
461 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
462 warp_start = vm_clock_warp_start;
463 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
464
465 if (warp_start == -1) {
946fb27c
PB
466 return;
467 }
468
03719e44 469 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 470 if (runstate_is_running()) {
8eda206e
PD
471 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
472 cpu_get_clock_locked());
8ed961d9
PB
473 int64_t warp_delta;
474
475 warp_delta = clock - vm_clock_warp_start;
476 if (use_icount == 2) {
946fb27c 477 /*
40daca54 478 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
479 * far ahead of real time.
480 */
17a15f1b 481 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 482 int64_t delta = clock - cur_icount;
8ed961d9 483 warp_delta = MIN(warp_delta, delta);
946fb27c 484 }
c96778bb 485 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
486 }
487 vm_clock_warp_start = -1;
03719e44 488 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
489
490 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
491 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
492 }
946fb27c
PB
493}
494
e76d1798 495static void icount_timer_cb(void *opaque)
efab87cf 496{
e76d1798
PD
497 /* No need for a checkpoint because the timer already synchronizes
498 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
499 */
500 icount_warp_rt();
efab87cf
PD
501}
502
8156be56
PB
503void qtest_clock_warp(int64_t dest)
504{
40daca54 505 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 506 AioContext *aio_context;
8156be56 507 assert(qtest_enabled());
efef88b3 508 aio_context = qemu_get_aio_context();
8156be56 509 while (clock < dest) {
40daca54 510 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 511 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 512
03719e44 513 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 514 timers_state.qemu_icount_bias += warp;
03719e44 515 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 516
40daca54 517 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 518 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 519 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 520 }
40daca54 521 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
522}
523
e76d1798 524void qemu_start_warp_timer(void)
946fb27c 525{
ce78d18c 526 int64_t clock;
946fb27c
PB
527 int64_t deadline;
528
e76d1798 529 if (!use_icount) {
946fb27c
PB
530 return;
531 }
532
8bd7f71d
PD
533 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
534 * do not fire, so computing the deadline does not make sense.
535 */
536 if (!runstate_is_running()) {
537 return;
538 }
539
540 /* warp clock deterministically in record/replay mode */
e76d1798 541 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
542 return;
543 }
544
ce78d18c 545 if (!all_cpu_threads_idle()) {
946fb27c
PB
546 return;
547 }
548
8156be56
PB
549 if (qtest_enabled()) {
550 /* When testing, qtest commands advance icount. */
e76d1798 551 return;
8156be56
PB
552 }
553
ac70aafc 554 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 555 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 556 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 557 if (deadline < 0) {
d7a0f71d
VC
558 static bool notified;
559 if (!icount_sleep && !notified) {
3dc6f869 560 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
561 notified = true;
562 }
ce78d18c 563 return;
ac70aafc
AB
564 }
565
946fb27c
PB
566 if (deadline > 0) {
567 /*
40daca54 568 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
569 * sleep. Otherwise, the CPU might be waiting for a future timer
570 * interrupt to wake it up, but the interrupt never comes because
571 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 572 * QEMU_CLOCK_VIRTUAL.
946fb27c 573 */
5045e9d9
VC
574 if (!icount_sleep) {
575 /*
576 * We never let VCPUs sleep in no sleep icount mode.
577 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
578 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
579 * It is useful when we want a deterministic execution time,
580 * isolated from host latencies.
581 */
03719e44 582 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 583 timers_state.qemu_icount_bias += deadline;
03719e44 584 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
585 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
586 } else {
587 /*
588 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
589 * "real" time, (related to the time left until the next event) has
590 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
591 * This avoids that the warps are visible externally; for example,
592 * you will not be sending network packets continuously instead of
593 * every 100ms.
594 */
03719e44 595 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
596 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
597 vm_clock_warp_start = clock;
598 }
03719e44 599 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 600 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 601 }
ac70aafc 602 } else if (deadline == 0) {
40daca54 603 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
604 }
605}
606
e76d1798
PD
607static void qemu_account_warp_timer(void)
608{
609 if (!use_icount || !icount_sleep) {
610 return;
611 }
612
613 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
614 * do not fire, so computing the deadline does not make sense.
615 */
616 if (!runstate_is_running()) {
617 return;
618 }
619
620 /* warp clock deterministically in record/replay mode */
621 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
622 return;
623 }
624
625 timer_del(icount_warp_timer);
626 icount_warp_rt();
627}
628
d09eae37
FK
629static bool icount_state_needed(void *opaque)
630{
631 return use_icount;
632}
633
634/*
635 * This is a subsection for icount migration.
636 */
637static const VMStateDescription icount_vmstate_timers = {
638 .name = "timer/icount",
639 .version_id = 1,
640 .minimum_version_id = 1,
5cd8cada 641 .needed = icount_state_needed,
d09eae37
FK
642 .fields = (VMStateField[]) {
643 VMSTATE_INT64(qemu_icount_bias, TimersState),
644 VMSTATE_INT64(qemu_icount, TimersState),
645 VMSTATE_END_OF_LIST()
646 }
647};
648
946fb27c
PB
649static const VMStateDescription vmstate_timers = {
650 .name = "timer",
651 .version_id = 2,
652 .minimum_version_id = 1,
35d08458 653 .fields = (VMStateField[]) {
946fb27c
PB
654 VMSTATE_INT64(cpu_ticks_offset, TimersState),
655 VMSTATE_INT64(dummy, TimersState),
656 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
657 VMSTATE_END_OF_LIST()
d09eae37 658 },
5cd8cada
JQ
659 .subsections = (const VMStateDescription*[]) {
660 &icount_vmstate_timers,
661 NULL
946fb27c
PB
662 }
663};
664
14e6fe12 665static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 666{
2adcc85d
JH
667 double pct;
668 double throttle_ratio;
669 long sleeptime_ns;
670
671 if (!cpu_throttle_get_percentage()) {
672 return;
673 }
674
675 pct = (double)cpu_throttle_get_percentage()/100;
676 throttle_ratio = pct / (1 - pct);
677 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
678
679 qemu_mutex_unlock_iothread();
2adcc85d
JH
680 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
681 qemu_mutex_lock_iothread();
90bb0c04 682 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
683}
684
685static void cpu_throttle_timer_tick(void *opaque)
686{
687 CPUState *cpu;
688 double pct;
689
690 /* Stop the timer if needed */
691 if (!cpu_throttle_get_percentage()) {
692 return;
693 }
694 CPU_FOREACH(cpu) {
695 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
696 async_run_on_cpu(cpu, cpu_throttle_thread,
697 RUN_ON_CPU_NULL);
2adcc85d
JH
698 }
699 }
700
701 pct = (double)cpu_throttle_get_percentage()/100;
702 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
703 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
704}
705
706void cpu_throttle_set(int new_throttle_pct)
707{
708 /* Ensure throttle percentage is within valid range */
709 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
710 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
711
712 atomic_set(&throttle_percentage, new_throttle_pct);
713
714 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
715 CPU_THROTTLE_TIMESLICE_NS);
716}
717
718void cpu_throttle_stop(void)
719{
720 atomic_set(&throttle_percentage, 0);
721}
722
723bool cpu_throttle_active(void)
724{
725 return (cpu_throttle_get_percentage() != 0);
726}
727
728int cpu_throttle_get_percentage(void)
729{
730 return atomic_read(&throttle_percentage);
731}
732
4603ea01
PD
733void cpu_ticks_init(void)
734{
ccdb3c1f 735 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 736 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
737 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
738 cpu_throttle_timer_tick, NULL);
4603ea01
PD
739}
740
1ad9580b 741void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 742{
1ad9580b 743 const char *option;
a8bfac37 744 char *rem_str = NULL;
1ad9580b 745
1ad9580b 746 option = qemu_opt_get(opts, "shift");
946fb27c 747 if (!option) {
a8bfac37
ST
748 if (qemu_opt_get(opts, "align") != NULL) {
749 error_setg(errp, "Please specify shift option when using align");
750 }
946fb27c
PB
751 return;
752 }
f1f4b57e
VC
753
754 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
755 if (icount_sleep) {
756 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 757 icount_timer_cb, NULL);
5045e9d9 758 }
f1f4b57e 759
a8bfac37 760 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
761
762 if (icount_align_option && !icount_sleep) {
778d9f9b 763 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 764 }
946fb27c 765 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
766 errno = 0;
767 icount_time_shift = strtol(option, &rem_str, 0);
768 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
769 error_setg(errp, "icount: Invalid shift value");
770 }
946fb27c
PB
771 use_icount = 1;
772 return;
a8bfac37
ST
773 } else if (icount_align_option) {
774 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 775 } else if (!icount_sleep) {
778d9f9b 776 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
777 }
778
779 use_icount = 2;
780
781 /* 125MIPS seems a reasonable initial guess at the guest speed.
782 It will be corrected fairly quickly anyway. */
783 icount_time_shift = 3;
784
785 /* Have both realtime and virtual time triggers for speed adjustment.
786 The realtime trigger catches emulated time passing too slowly,
787 the virtual time trigger catches emulated time passing too fast.
788 Realtime triggers occur even when idle, so use them less frequently
789 than VM triggers. */
bf2a7ddb
PD
790 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
791 icount_adjust_rt, NULL);
40daca54 792 timer_mod(icount_rt_timer,
bf2a7ddb 793 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
794 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
795 icount_adjust_vm, NULL);
796 timer_mod(icount_vm_timer,
797 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 798 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
799}
800
6546706d
AB
801/***********************************************************/
802/* TCG vCPU kick timer
803 *
804 * The kick timer is responsible for moving single threaded vCPU
805 * emulation on to the next vCPU. If more than one vCPU is running a
806 * timer event with force a cpu->exit so the next vCPU can get
807 * scheduled.
808 *
809 * The timer is removed if all vCPUs are idle and restarted again once
810 * idleness is complete.
811 */
812
813static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 814static CPUState *tcg_current_rr_cpu;
6546706d
AB
815
816#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
817
818static inline int64_t qemu_tcg_next_kick(void)
819{
820 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
821}
822
791158d9
AB
823/* Kick the currently round-robin scheduled vCPU */
824static void qemu_cpu_kick_rr_cpu(void)
825{
826 CPUState *cpu;
791158d9
AB
827 do {
828 cpu = atomic_mb_read(&tcg_current_rr_cpu);
829 if (cpu) {
830 cpu_exit(cpu);
831 }
832 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
833}
834
6b8f0187
PB
835static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
836{
837}
838
3f53bc61
PB
839void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
840{
6b8f0187
PB
841 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
842 qemu_notify_event();
843 return;
844 }
845
846 if (!qemu_in_vcpu_thread() && first_cpu) {
847 /* qemu_cpu_kick is not enough to kick a halted CPU out of
848 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
849 * causes cpu_thread_is_idle to return false. This way,
850 * handle_icount_deadline can run.
851 */
852 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
853 }
3f53bc61
PB
854}
855
6546706d
AB
856static void kick_tcg_thread(void *opaque)
857{
858 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 859 qemu_cpu_kick_rr_cpu();
6546706d
AB
860}
861
862static void start_tcg_kick_timer(void)
863{
37257942 864 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
865 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
866 kick_tcg_thread, NULL);
867 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
868 }
869}
870
871static void stop_tcg_kick_timer(void)
872{
873 if (tcg_kick_vcpu_timer) {
874 timer_del(tcg_kick_vcpu_timer);
875 tcg_kick_vcpu_timer = NULL;
876 }
877}
878
296af7c9
BS
879/***********************************************************/
880void hw_error(const char *fmt, ...)
881{
882 va_list ap;
55e5c285 883 CPUState *cpu;
296af7c9
BS
884
885 va_start(ap, fmt);
886 fprintf(stderr, "qemu: hardware error: ");
887 vfprintf(stderr, fmt, ap);
888 fprintf(stderr, "\n");
bdc44640 889 CPU_FOREACH(cpu) {
55e5c285 890 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 891 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
892 }
893 va_end(ap);
894 abort();
895}
896
897void cpu_synchronize_all_states(void)
898{
182735ef 899 CPUState *cpu;
296af7c9 900
bdc44640 901 CPU_FOREACH(cpu) {
182735ef 902 cpu_synchronize_state(cpu);
296af7c9
BS
903 }
904}
905
906void cpu_synchronize_all_post_reset(void)
907{
182735ef 908 CPUState *cpu;
296af7c9 909
bdc44640 910 CPU_FOREACH(cpu) {
182735ef 911 cpu_synchronize_post_reset(cpu);
296af7c9
BS
912 }
913}
914
915void cpu_synchronize_all_post_init(void)
916{
182735ef 917 CPUState *cpu;
296af7c9 918
bdc44640 919 CPU_FOREACH(cpu) {
182735ef 920 cpu_synchronize_post_init(cpu);
296af7c9
BS
921 }
922}
923
75e972da
DG
924void cpu_synchronize_all_pre_loadvm(void)
925{
926 CPUState *cpu;
927
928 CPU_FOREACH(cpu) {
929 cpu_synchronize_pre_loadvm(cpu);
930 }
931}
932
56983463 933static int do_vm_stop(RunState state)
296af7c9 934{
56983463
KW
935 int ret = 0;
936
1354869c 937 if (runstate_is_running()) {
296af7c9 938 cpu_disable_ticks();
296af7c9 939 pause_all_vcpus();
f5bbfba1 940 runstate_set(state);
1dfb4dd9 941 vm_state_notify(0, state);
a4e15de9 942 qapi_event_send_stop(&error_abort);
296af7c9 943 }
56983463 944
594a45ce 945 bdrv_drain_all();
6d0ceb80 946 replay_disable_events();
22af08ea 947 ret = bdrv_flush_all();
594a45ce 948
56983463 949 return ret;
296af7c9
BS
950}
951
a1fcaa73 952static bool cpu_can_run(CPUState *cpu)
296af7c9 953{
4fdeee7c 954 if (cpu->stop) {
a1fcaa73 955 return false;
0ab07c62 956 }
321bc0b2 957 if (cpu_is_stopped(cpu)) {
a1fcaa73 958 return false;
0ab07c62 959 }
a1fcaa73 960 return true;
296af7c9
BS
961}
962
91325046 963static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 964{
64f6b346 965 gdb_set_stop_cpu(cpu);
8cf71710 966 qemu_system_debug_request();
f324e766 967 cpu->stopped = true;
3c638d06
JK
968}
969
6d9cb73c
JK
970#ifdef CONFIG_LINUX
971static void sigbus_reraise(void)
972{
973 sigset_t set;
974 struct sigaction action;
975
976 memset(&action, 0, sizeof(action));
977 action.sa_handler = SIG_DFL;
978 if (!sigaction(SIGBUS, &action, NULL)) {
979 raise(SIGBUS);
980 sigemptyset(&set);
981 sigaddset(&set, SIGBUS);
a2d1761d 982 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
983 }
984 perror("Failed to re-raise SIGBUS!\n");
985 abort();
986}
987
d98d4072 988static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 989{
a16fc07e
PB
990 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
991 sigbus_reraise();
992 }
993
2ae41db2
PB
994 if (current_cpu) {
995 /* Called asynchronously in VCPU thread. */
996 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
997 sigbus_reraise();
998 }
999 } else {
1000 /* Called synchronously (via signalfd) in main thread. */
1001 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1002 sigbus_reraise();
1003 }
6d9cb73c
JK
1004 }
1005}
1006
1007static void qemu_init_sigbus(void)
1008{
1009 struct sigaction action;
1010
1011 memset(&action, 0, sizeof(action));
1012 action.sa_flags = SA_SIGINFO;
d98d4072 1013 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1014 sigaction(SIGBUS, &action, NULL);
1015
1016 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1017}
6d9cb73c 1018#else /* !CONFIG_LINUX */
6d9cb73c
JK
1019static void qemu_init_sigbus(void)
1020{
1021}
a16fc07e 1022#endif /* !CONFIG_LINUX */
ff48eb5f 1023
b2532d88 1024static QemuMutex qemu_global_mutex;
296af7c9
BS
1025
1026static QemuThread io_thread;
1027
296af7c9
BS
1028/* cpu creation */
1029static QemuCond qemu_cpu_cond;
1030/* system init */
296af7c9
BS
1031static QemuCond qemu_pause_cond;
1032
d3b12f5d 1033void qemu_init_cpu_loop(void)
296af7c9 1034{
6d9cb73c 1035 qemu_init_sigbus();
ed94592b 1036 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1037 qemu_cond_init(&qemu_pause_cond);
296af7c9 1038 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1039
b7680cb6 1040 qemu_thread_get_self(&io_thread);
296af7c9
BS
1041}
1042
14e6fe12 1043void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1044{
d148d90e 1045 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1046}
1047
4c055ab5
GZ
1048static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1049{
1050 if (kvm_destroy_vcpu(cpu) < 0) {
1051 error_report("kvm_destroy_vcpu failed");
1052 exit(EXIT_FAILURE);
1053 }
1054}
1055
1056static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1057{
1058}
1059
509a0d78 1060static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1061{
37257942 1062 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1063 if (cpu->stop) {
1064 cpu->stop = false;
f324e766 1065 cpu->stopped = true;
96bce683 1066 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1067 }
a5403c69 1068 process_queued_cpu_work(cpu);
37257942
AB
1069}
1070
1071static bool qemu_tcg_should_sleep(CPUState *cpu)
1072{
1073 if (mttcg_enabled) {
1074 return cpu_thread_is_idle(cpu);
1075 } else {
1076 return all_cpu_threads_idle();
1077 }
296af7c9
BS
1078}
1079
d5f8d613 1080static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1081{
37257942 1082 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1083 stop_tcg_kick_timer();
d5f8d613 1084 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1085 }
296af7c9 1086
6546706d
AB
1087 start_tcg_kick_timer();
1088
37257942 1089 qemu_wait_io_event_common(cpu);
296af7c9
BS
1090}
1091
fd529e8f 1092static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1093{
a98ae1d8 1094 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1095 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1096 }
296af7c9 1097
509a0d78 1098 qemu_wait_io_event_common(cpu);
296af7c9
BS
1099}
1100
7e97cd88 1101static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1102{
48a106bd 1103 CPUState *cpu = arg;
84b4915d 1104 int r;
296af7c9 1105
ab28bd23
PB
1106 rcu_register_thread();
1107
2e7f7a3c 1108 qemu_mutex_lock_iothread();
814e612e 1109 qemu_thread_get_self(cpu->thread);
9f09e18a 1110 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1111 cpu->can_do_io = 1;
4917cf44 1112 current_cpu = cpu;
296af7c9 1113
504134d2 1114 r = kvm_init_vcpu(cpu);
84b4915d
JK
1115 if (r < 0) {
1116 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1117 exit(1);
1118 }
296af7c9 1119
18268b60 1120 kvm_init_cpu_signals(cpu);
296af7c9
BS
1121
1122 /* signal CPU creation */
61a46217 1123 cpu->created = true;
296af7c9
BS
1124 qemu_cond_signal(&qemu_cpu_cond);
1125
4c055ab5 1126 do {
a1fcaa73 1127 if (cpu_can_run(cpu)) {
1458c363 1128 r = kvm_cpu_exec(cpu);
83f338f7 1129 if (r == EXCP_DEBUG) {
91325046 1130 cpu_handle_guest_debug(cpu);
83f338f7 1131 }
0ab07c62 1132 }
fd529e8f 1133 qemu_kvm_wait_io_event(cpu);
4c055ab5 1134 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1135
4c055ab5 1136 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1137 cpu->created = false;
1138 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1139 qemu_mutex_unlock_iothread();
296af7c9
BS
1140 return NULL;
1141}
1142
c7f0f3b1
AL
1143static void *qemu_dummy_cpu_thread_fn(void *arg)
1144{
1145#ifdef _WIN32
1146 fprintf(stderr, "qtest is not supported under Windows\n");
1147 exit(1);
1148#else
10a9021d 1149 CPUState *cpu = arg;
c7f0f3b1
AL
1150 sigset_t waitset;
1151 int r;
1152
ab28bd23
PB
1153 rcu_register_thread();
1154
c7f0f3b1 1155 qemu_mutex_lock_iothread();
814e612e 1156 qemu_thread_get_self(cpu->thread);
9f09e18a 1157 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1158 cpu->can_do_io = 1;
37257942 1159 current_cpu = cpu;
c7f0f3b1
AL
1160
1161 sigemptyset(&waitset);
1162 sigaddset(&waitset, SIG_IPI);
1163
1164 /* signal CPU creation */
61a46217 1165 cpu->created = true;
c7f0f3b1
AL
1166 qemu_cond_signal(&qemu_cpu_cond);
1167
c7f0f3b1 1168 while (1) {
c7f0f3b1
AL
1169 qemu_mutex_unlock_iothread();
1170 do {
1171 int sig;
1172 r = sigwait(&waitset, &sig);
1173 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1174 if (r == -1) {
1175 perror("sigwait");
1176 exit(1);
1177 }
1178 qemu_mutex_lock_iothread();
509a0d78 1179 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1180 }
1181
1182 return NULL;
1183#endif
1184}
1185
1be7fcb8
AB
1186static int64_t tcg_get_icount_limit(void)
1187{
1188 int64_t deadline;
1189
1190 if (replay_mode != REPLAY_MODE_PLAY) {
1191 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1192
1193 /* Maintain prior (possibly buggy) behaviour where if no deadline
1194 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1195 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1196 * nanoseconds.
1197 */
1198 if ((deadline < 0) || (deadline > INT32_MAX)) {
1199 deadline = INT32_MAX;
1200 }
1201
1202 return qemu_icount_round(deadline);
1203 } else {
1204 return replay_get_instructions();
1205 }
1206}
1207
12e9700d
AB
1208static void handle_icount_deadline(void)
1209{
6b8f0187 1210 assert(qemu_in_vcpu_thread());
12e9700d
AB
1211 if (use_icount) {
1212 int64_t deadline =
1213 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1214
1215 if (deadline == 0) {
6b8f0187 1216 /* Wake up other AioContexts. */
12e9700d 1217 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1218 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1219 }
1220 }
1221}
1222
05248382 1223static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1224{
1be7fcb8 1225 if (use_icount) {
eda5f7c6 1226 int insns_left;
05248382
AB
1227
1228 /* These should always be cleared by process_icount_data after
1229 * each vCPU execution. However u16.high can be raised
1230 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1231 */
1232 g_assert(cpu->icount_decr.u16.low == 0);
1233 g_assert(cpu->icount_extra == 0);
1234
eda5f7c6
AB
1235 cpu->icount_budget = tcg_get_icount_limit();
1236 insns_left = MIN(0xffff, cpu->icount_budget);
1237 cpu->icount_decr.u16.low = insns_left;
1238 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1239 }
05248382
AB
1240}
1241
1242static void process_icount_data(CPUState *cpu)
1243{
1be7fcb8 1244 if (use_icount) {
e4cd9657 1245 /* Account for executed instructions */
512d3c80 1246 cpu_update_icount(cpu);
05248382
AB
1247
1248 /* Reset the counters */
1249 cpu->icount_decr.u16.low = 0;
1be7fcb8 1250 cpu->icount_extra = 0;
e4cd9657
AB
1251 cpu->icount_budget = 0;
1252
1be7fcb8
AB
1253 replay_account_executed_instructions();
1254 }
05248382
AB
1255}
1256
1257
1258static int tcg_cpu_exec(CPUState *cpu)
1259{
1260 int ret;
1261#ifdef CONFIG_PROFILER
1262 int64_t ti;
1263#endif
1264
1265#ifdef CONFIG_PROFILER
1266 ti = profile_getclock();
1267#endif
1268 qemu_mutex_unlock_iothread();
1269 cpu_exec_start(cpu);
1270 ret = cpu_exec(cpu);
1271 cpu_exec_end(cpu);
1272 qemu_mutex_lock_iothread();
1273#ifdef CONFIG_PROFILER
1274 tcg_time += profile_getclock() - ti;
1275#endif
1be7fcb8
AB
1276 return ret;
1277}
1278
c93bbbef
AB
1279/* Destroy any remaining vCPUs which have been unplugged and have
1280 * finished running
1281 */
1282static void deal_with_unplugged_cpus(void)
1be7fcb8 1283{
c93bbbef 1284 CPUState *cpu;
1be7fcb8 1285
c93bbbef
AB
1286 CPU_FOREACH(cpu) {
1287 if (cpu->unplug && !cpu_can_run(cpu)) {
1288 qemu_tcg_destroy_vcpu(cpu);
1289 cpu->created = false;
1290 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1291 break;
1292 }
1293 }
1be7fcb8 1294}
bdb7ca67 1295
6546706d
AB
1296/* Single-threaded TCG
1297 *
1298 * In the single-threaded case each vCPU is simulated in turn. If
1299 * there is more than a single vCPU we create a simple timer to kick
1300 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1301 * This is done explicitly rather than relying on side-effects
1302 * elsewhere.
1303 */
1304
37257942 1305static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1306{
c3586ba7 1307 CPUState *cpu = arg;
296af7c9 1308
ab28bd23 1309 rcu_register_thread();
3468b59e 1310 tcg_register_thread();
ab28bd23 1311
2e7f7a3c 1312 qemu_mutex_lock_iothread();
814e612e 1313 qemu_thread_get_self(cpu->thread);
296af7c9 1314
38fcbd3f
AF
1315 CPU_FOREACH(cpu) {
1316 cpu->thread_id = qemu_get_thread_id();
1317 cpu->created = true;
626cf8f4 1318 cpu->can_do_io = 1;
38fcbd3f 1319 }
296af7c9
BS
1320 qemu_cond_signal(&qemu_cpu_cond);
1321
fa7d1867 1322 /* wait for initial kick-off after machine start */
c28e399c 1323 while (first_cpu->stopped) {
d5f8d613 1324 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1325
1326 /* process any pending work */
bdc44640 1327 CPU_FOREACH(cpu) {
37257942 1328 current_cpu = cpu;
182735ef 1329 qemu_wait_io_event_common(cpu);
8e564b4e 1330 }
0ab07c62 1331 }
296af7c9 1332
6546706d
AB
1333 start_tcg_kick_timer();
1334
c93bbbef
AB
1335 cpu = first_cpu;
1336
e5143e30
AB
1337 /* process any pending work */
1338 cpu->exit_request = 1;
1339
296af7c9 1340 while (1) {
c93bbbef
AB
1341 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1342 qemu_account_warp_timer();
1343
6b8f0187
PB
1344 /* Run the timers here. This is much more efficient than
1345 * waking up the I/O thread and waiting for completion.
1346 */
1347 handle_icount_deadline();
1348
c93bbbef
AB
1349 if (!cpu) {
1350 cpu = first_cpu;
1351 }
1352
e5143e30
AB
1353 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1354
791158d9 1355 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1356 current_cpu = cpu;
c93bbbef
AB
1357
1358 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1359 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1360
1361 if (cpu_can_run(cpu)) {
1362 int r;
05248382
AB
1363
1364 prepare_icount_for_run(cpu);
1365
c93bbbef 1366 r = tcg_cpu_exec(cpu);
05248382
AB
1367
1368 process_icount_data(cpu);
1369
c93bbbef
AB
1370 if (r == EXCP_DEBUG) {
1371 cpu_handle_guest_debug(cpu);
1372 break;
08e73c48
PK
1373 } else if (r == EXCP_ATOMIC) {
1374 qemu_mutex_unlock_iothread();
1375 cpu_exec_step_atomic(cpu);
1376 qemu_mutex_lock_iothread();
1377 break;
c93bbbef 1378 }
37257942 1379 } else if (cpu->stop) {
c93bbbef
AB
1380 if (cpu->unplug) {
1381 cpu = CPU_NEXT(cpu);
1382 }
1383 break;
1384 }
1385
e5143e30
AB
1386 cpu = CPU_NEXT(cpu);
1387 } /* while (cpu && !cpu->exit_request).. */
1388
791158d9
AB
1389 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1390 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1391
e5143e30
AB
1392 if (cpu && cpu->exit_request) {
1393 atomic_mb_set(&cpu->exit_request, 0);
1394 }
ac70aafc 1395
37257942 1396 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1397 deal_with_unplugged_cpus();
296af7c9
BS
1398 }
1399
1400 return NULL;
1401}
1402
b0cb0a66
VP
1403static void *qemu_hax_cpu_thread_fn(void *arg)
1404{
1405 CPUState *cpu = arg;
1406 int r;
b3d3a426
VP
1407
1408 qemu_mutex_lock_iothread();
b0cb0a66 1409 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1410
1411 cpu->thread_id = qemu_get_thread_id();
1412 cpu->created = true;
1413 cpu->halted = 0;
1414 current_cpu = cpu;
1415
1416 hax_init_vcpu(cpu);
1417 qemu_cond_signal(&qemu_cpu_cond);
1418
1419 while (1) {
1420 if (cpu_can_run(cpu)) {
1421 r = hax_smp_cpu_exec(cpu);
1422 if (r == EXCP_DEBUG) {
1423 cpu_handle_guest_debug(cpu);
1424 }
1425 }
1426
1427 while (cpu_thread_is_idle(cpu)) {
1428 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1429 }
1430#ifdef _WIN32
1431 SleepEx(0, TRUE);
1432#endif
1433 qemu_wait_io_event_common(cpu);
1434 }
1435 return NULL;
1436}
1437
1438#ifdef _WIN32
1439static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1440{
1441}
1442#endif
1443
37257942
AB
1444/* Multi-threaded TCG
1445 *
1446 * In the multi-threaded case each vCPU has its own thread. The TLS
1447 * variable current_cpu can be used deep in the code to find the
1448 * current CPUState for a given thread.
1449 */
1450
1451static void *qemu_tcg_cpu_thread_fn(void *arg)
1452{
1453 CPUState *cpu = arg;
1454
bf51c720
AB
1455 g_assert(!use_icount);
1456
37257942 1457 rcu_register_thread();
3468b59e 1458 tcg_register_thread();
37257942
AB
1459
1460 qemu_mutex_lock_iothread();
1461 qemu_thread_get_self(cpu->thread);
1462
1463 cpu->thread_id = qemu_get_thread_id();
1464 cpu->created = true;
1465 cpu->can_do_io = 1;
1466 current_cpu = cpu;
1467 qemu_cond_signal(&qemu_cpu_cond);
1468
1469 /* process any pending work */
1470 cpu->exit_request = 1;
1471
1472 while (1) {
1473 if (cpu_can_run(cpu)) {
1474 int r;
1475 r = tcg_cpu_exec(cpu);
1476 switch (r) {
1477 case EXCP_DEBUG:
1478 cpu_handle_guest_debug(cpu);
1479 break;
1480 case EXCP_HALTED:
1481 /* during start-up the vCPU is reset and the thread is
1482 * kicked several times. If we don't ensure we go back
1483 * to sleep in the halted state we won't cleanly
1484 * start-up when the vCPU is enabled.
1485 *
1486 * cpu->halted should ensure we sleep in wait_io_event
1487 */
1488 g_assert(cpu->halted);
1489 break;
08e73c48
PK
1490 case EXCP_ATOMIC:
1491 qemu_mutex_unlock_iothread();
1492 cpu_exec_step_atomic(cpu);
1493 qemu_mutex_lock_iothread();
37257942
AB
1494 default:
1495 /* Ignore everything else? */
1496 break;
1497 }
a3e53273
BR
1498 } else if (cpu->unplug) {
1499 qemu_tcg_destroy_vcpu(cpu);
1500 cpu->created = false;
1501 qemu_cond_signal(&qemu_cpu_cond);
1502 qemu_mutex_unlock_iothread();
1503 return NULL;
37257942
AB
1504 }
1505
37257942
AB
1506 atomic_mb_set(&cpu->exit_request, 0);
1507 qemu_tcg_wait_io_event(cpu);
1508 }
1509
1510 return NULL;
1511}
1512
2ff09a40 1513static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1514{
1515#ifndef _WIN32
1516 int err;
1517
e0c38211
PB
1518 if (cpu->thread_kicked) {
1519 return;
9102deda 1520 }
e0c38211 1521 cpu->thread_kicked = true;
814e612e 1522 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1523 if (err) {
1524 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1525 exit(1);
1526 }
1527#else /* _WIN32 */
b0cb0a66
VP
1528 if (!qemu_cpu_is_self(cpu)) {
1529 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1530 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1531 __func__, GetLastError());
1532 exit(1);
1533 }
1534 }
e0c38211
PB
1535#endif
1536}
ed9164a3 1537
c08d7424 1538void qemu_cpu_kick(CPUState *cpu)
296af7c9 1539{
f5c121b8 1540 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1541 if (tcg_enabled()) {
791158d9 1542 cpu_exit(cpu);
37257942 1543 /* NOP unless doing single-thread RR */
791158d9 1544 qemu_cpu_kick_rr_cpu();
e0c38211 1545 } else {
b0cb0a66
VP
1546 if (hax_enabled()) {
1547 /*
1548 * FIXME: race condition with the exit_request check in
1549 * hax_vcpu_hax_exec
1550 */
1551 cpu->exit_request = 1;
1552 }
e0c38211
PB
1553 qemu_cpu_kick_thread(cpu);
1554 }
296af7c9
BS
1555}
1556
46d62fac 1557void qemu_cpu_kick_self(void)
296af7c9 1558{
4917cf44 1559 assert(current_cpu);
9102deda 1560 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1561}
1562
60e82579 1563bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1564{
814e612e 1565 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1566}
1567
79e2b9ae 1568bool qemu_in_vcpu_thread(void)
aa723c23 1569{
4917cf44 1570 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1571}
1572
afbe7053
PB
1573static __thread bool iothread_locked = false;
1574
1575bool qemu_mutex_iothread_locked(void)
1576{
1577 return iothread_locked;
1578}
1579
296af7c9
BS
1580void qemu_mutex_lock_iothread(void)
1581{
8d04fb55
JK
1582 g_assert(!qemu_mutex_iothread_locked());
1583 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1584 iothread_locked = true;
296af7c9
BS
1585}
1586
1587void qemu_mutex_unlock_iothread(void)
1588{
8d04fb55 1589 g_assert(qemu_mutex_iothread_locked());
afbe7053 1590 iothread_locked = false;
296af7c9
BS
1591 qemu_mutex_unlock(&qemu_global_mutex);
1592}
1593
e8faee06 1594static bool all_vcpus_paused(void)
296af7c9 1595{
bdc44640 1596 CPUState *cpu;
296af7c9 1597
bdc44640 1598 CPU_FOREACH(cpu) {
182735ef 1599 if (!cpu->stopped) {
e8faee06 1600 return false;
0ab07c62 1601 }
296af7c9
BS
1602 }
1603
e8faee06 1604 return true;
296af7c9
BS
1605}
1606
1607void pause_all_vcpus(void)
1608{
bdc44640 1609 CPUState *cpu;
296af7c9 1610
40daca54 1611 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1612 CPU_FOREACH(cpu) {
182735ef
AF
1613 cpu->stop = true;
1614 qemu_cpu_kick(cpu);
296af7c9
BS
1615 }
1616
aa723c23 1617 if (qemu_in_vcpu_thread()) {
d798e974 1618 cpu_stop_current();
d798e974
JK
1619 }
1620
296af7c9 1621 while (!all_vcpus_paused()) {
be7d6c57 1622 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1623 CPU_FOREACH(cpu) {
182735ef 1624 qemu_cpu_kick(cpu);
296af7c9
BS
1625 }
1626 }
1627}
1628
2993683b
IM
1629void cpu_resume(CPUState *cpu)
1630{
1631 cpu->stop = false;
1632 cpu->stopped = false;
1633 qemu_cpu_kick(cpu);
1634}
1635
296af7c9
BS
1636void resume_all_vcpus(void)
1637{
bdc44640 1638 CPUState *cpu;
296af7c9 1639
40daca54 1640 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1641 CPU_FOREACH(cpu) {
182735ef 1642 cpu_resume(cpu);
296af7c9
BS
1643 }
1644}
1645
4c055ab5
GZ
1646void cpu_remove(CPUState *cpu)
1647{
1648 cpu->stop = true;
1649 cpu->unplug = true;
1650 qemu_cpu_kick(cpu);
1651}
1652
2c579042
BR
1653void cpu_remove_sync(CPUState *cpu)
1654{
1655 cpu_remove(cpu);
1656 while (cpu->created) {
1657 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1658 }
1659}
1660
4900116e
DDAG
1661/* For temporary buffers for forming a name */
1662#define VCPU_THREAD_NAME_SIZE 16
1663
e5ab30a2 1664static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1665{
4900116e 1666 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1667 static QemuCond *single_tcg_halt_cond;
1668 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1669 static int tcg_region_inited;
1670
1671 /*
1672 * Initialize TCG regions--once. Now is a good time, because:
1673 * (1) TCG's init context, prologue and target globals have been set up.
1674 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1675 * -accel flag is processed, so the check doesn't work then).
1676 */
1677 if (!tcg_region_inited) {
1678 tcg_region_inited = 1;
1679 tcg_region_init();
1680 }
4900116e 1681
37257942 1682 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1683 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1684 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1685 qemu_cond_init(cpu->halt_cond);
37257942
AB
1686
1687 if (qemu_tcg_mttcg_enabled()) {
1688 /* create a thread per vCPU with TCG (MTTCG) */
1689 parallel_cpus = true;
1690 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1691 cpu->cpu_index);
37257942
AB
1692
1693 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1694 cpu, QEMU_THREAD_JOINABLE);
1695
1696 } else {
1697 /* share a single thread for all cpus with TCG */
1698 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1699 qemu_thread_create(cpu->thread, thread_name,
1700 qemu_tcg_rr_cpu_thread_fn,
1701 cpu, QEMU_THREAD_JOINABLE);
1702
1703 single_tcg_halt_cond = cpu->halt_cond;
1704 single_tcg_cpu_thread = cpu->thread;
1705 }
1ecf47bf 1706#ifdef _WIN32
814e612e 1707 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1708#endif
61a46217 1709 while (!cpu->created) {
18a85728 1710 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1711 }
296af7c9 1712 } else {
37257942
AB
1713 /* For non-MTTCG cases we share the thread */
1714 cpu->thread = single_tcg_cpu_thread;
1715 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1716 }
1717}
1718
b0cb0a66
VP
1719static void qemu_hax_start_vcpu(CPUState *cpu)
1720{
1721 char thread_name[VCPU_THREAD_NAME_SIZE];
1722
1723 cpu->thread = g_malloc0(sizeof(QemuThread));
1724 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1725 qemu_cond_init(cpu->halt_cond);
1726
1727 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1728 cpu->cpu_index);
1729 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1730 cpu, QEMU_THREAD_JOINABLE);
1731#ifdef _WIN32
1732 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1733#endif
1734 while (!cpu->created) {
1735 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1736 }
1737}
1738
48a106bd 1739static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1740{
4900116e
DDAG
1741 char thread_name[VCPU_THREAD_NAME_SIZE];
1742
814e612e 1743 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1744 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1745 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1746 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1747 cpu->cpu_index);
1748 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1749 cpu, QEMU_THREAD_JOINABLE);
61a46217 1750 while (!cpu->created) {
18a85728 1751 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1752 }
296af7c9
BS
1753}
1754
10a9021d 1755static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1756{
4900116e
DDAG
1757 char thread_name[VCPU_THREAD_NAME_SIZE];
1758
814e612e 1759 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1760 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1761 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1762 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1763 cpu->cpu_index);
1764 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1765 QEMU_THREAD_JOINABLE);
61a46217 1766 while (!cpu->created) {
c7f0f3b1
AL
1767 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1768 }
1769}
1770
c643bed9 1771void qemu_init_vcpu(CPUState *cpu)
296af7c9 1772{
ce3960eb
AF
1773 cpu->nr_cores = smp_cores;
1774 cpu->nr_threads = smp_threads;
f324e766 1775 cpu->stopped = true;
56943e8c
PM
1776
1777 if (!cpu->as) {
1778 /* If the target cpu hasn't set up any address spaces itself,
1779 * give it the default one.
1780 */
b516572f
AK
1781 AddressSpace *as = g_new0(AddressSpace, 1);
1782
1783 address_space_init(as, cpu->memory, "cpu-memory");
12ebc9a7 1784 cpu->num_ases = 1;
6731d864 1785 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1786 }
1787
0ab07c62 1788 if (kvm_enabled()) {
48a106bd 1789 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1790 } else if (hax_enabled()) {
1791 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1792 } else if (tcg_enabled()) {
e5ab30a2 1793 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1794 } else {
10a9021d 1795 qemu_dummy_start_vcpu(cpu);
0ab07c62 1796 }
296af7c9
BS
1797}
1798
b4a3d965 1799void cpu_stop_current(void)
296af7c9 1800{
4917cf44
AF
1801 if (current_cpu) {
1802 current_cpu->stop = false;
1803 current_cpu->stopped = true;
1804 cpu_exit(current_cpu);
96bce683 1805 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1806 }
296af7c9
BS
1807}
1808
56983463 1809int vm_stop(RunState state)
296af7c9 1810{
aa723c23 1811 if (qemu_in_vcpu_thread()) {
74892d24 1812 qemu_system_vmstop_request_prepare();
1dfb4dd9 1813 qemu_system_vmstop_request(state);
296af7c9
BS
1814 /*
1815 * FIXME: should not return to device code in case
1816 * vm_stop() has been requested.
1817 */
b4a3d965 1818 cpu_stop_current();
56983463 1819 return 0;
296af7c9 1820 }
56983463
KW
1821
1822 return do_vm_stop(state);
296af7c9
BS
1823}
1824
2d76e823
CI
1825/**
1826 * Prepare for (re)starting the VM.
1827 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1828 * running or in case of an error condition), 0 otherwise.
1829 */
1830int vm_prepare_start(void)
1831{
1832 RunState requested;
1833 int res = 0;
1834
1835 qemu_vmstop_requested(&requested);
1836 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1837 return -1;
1838 }
1839
1840 /* Ensure that a STOP/RESUME pair of events is emitted if a
1841 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1842 * example, according to documentation is always followed by
1843 * the STOP event.
1844 */
1845 if (runstate_is_running()) {
1846 qapi_event_send_stop(&error_abort);
1847 res = -1;
1848 } else {
1849 replay_enable_events();
1850 cpu_enable_ticks();
1851 runstate_set(RUN_STATE_RUNNING);
1852 vm_state_notify(1, RUN_STATE_RUNNING);
1853 }
1854
1855 /* We are sending this now, but the CPUs will be resumed shortly later */
1856 qapi_event_send_resume(&error_abort);
1857 return res;
1858}
1859
1860void vm_start(void)
1861{
1862 if (!vm_prepare_start()) {
1863 resume_all_vcpus();
1864 }
1865}
1866
8a9236f1
LC
1867/* does a state transition even if the VM is already stopped,
1868 current state is forgotten forever */
56983463 1869int vm_stop_force_state(RunState state)
8a9236f1
LC
1870{
1871 if (runstate_is_running()) {
56983463 1872 return vm_stop(state);
8a9236f1
LC
1873 } else {
1874 runstate_set(state);
b2780d32
WC
1875
1876 bdrv_drain_all();
594a45ce
KW
1877 /* Make sure to return an error if the flush in a previous vm_stop()
1878 * failed. */
22af08ea 1879 return bdrv_flush_all();
8a9236f1
LC
1880 }
1881}
1882
9a78eead 1883void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1884{
1885 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1886#if defined(cpu_list)
1887 cpu_list(f, cpu_fprintf);
262353cb
BS
1888#endif
1889}
de0b36b6
LC
1890
1891CpuInfoList *qmp_query_cpus(Error **errp)
1892{
afed5a5a
IM
1893 MachineState *ms = MACHINE(qdev_get_machine());
1894 MachineClass *mc = MACHINE_GET_CLASS(ms);
de0b36b6 1895 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1896 CPUState *cpu;
de0b36b6 1897
bdc44640 1898 CPU_FOREACH(cpu) {
de0b36b6 1899 CpuInfoList *info;
182735ef
AF
1900#if defined(TARGET_I386)
1901 X86CPU *x86_cpu = X86_CPU(cpu);
1902 CPUX86State *env = &x86_cpu->env;
1903#elif defined(TARGET_PPC)
1904 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1905 CPUPPCState *env = &ppc_cpu->env;
1906#elif defined(TARGET_SPARC)
1907 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1908 CPUSPARCState *env = &sparc_cpu->env;
1909#elif defined(TARGET_MIPS)
1910 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1911 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1912#elif defined(TARGET_TRICORE)
1913 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1914 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1915#endif
de0b36b6 1916
cb446eca 1917 cpu_synchronize_state(cpu);
de0b36b6
LC
1918
1919 info = g_malloc0(sizeof(*info));
1920 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1921 info->value->CPU = cpu->cpu_index;
182735ef 1922 info->value->current = (cpu == first_cpu);
259186a7 1923 info->value->halted = cpu->halted;
58f88d4b 1924 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1925 info->value->thread_id = cpu->thread_id;
de0b36b6 1926#if defined(TARGET_I386)
86f4b687 1927 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1928 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1929#elif defined(TARGET_PPC)
86f4b687 1930 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1931 info->value->u.ppc.nip = env->nip;
de0b36b6 1932#elif defined(TARGET_SPARC)
86f4b687 1933 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1934 info->value->u.q_sparc.pc = env->pc;
1935 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1936#elif defined(TARGET_MIPS)
86f4b687 1937 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1938 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1939#elif defined(TARGET_TRICORE)
86f4b687 1940 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1941 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1942#else
1943 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6 1944#endif
afed5a5a
IM
1945 info->value->has_props = !!mc->cpu_index_to_instance_props;
1946 if (info->value->has_props) {
1947 CpuInstanceProperties *props;
1948 props = g_malloc0(sizeof(*props));
1949 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
1950 info->value->props = props;
1951 }
de0b36b6
LC
1952
1953 /* XXX: waiting for the qapi to support GSList */
1954 if (!cur_item) {
1955 head = cur_item = info;
1956 } else {
1957 cur_item->next = info;
1958 cur_item = info;
1959 }
1960 }
1961
1962 return head;
1963}
0cfd6a9a
LC
1964
1965void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1966 bool has_cpu, int64_t cpu_index, Error **errp)
1967{
1968 FILE *f;
1969 uint32_t l;
55e5c285 1970 CPUState *cpu;
0cfd6a9a 1971 uint8_t buf[1024];
0dc9daf0 1972 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1973
1974 if (!has_cpu) {
1975 cpu_index = 0;
1976 }
1977
151d1322
AF
1978 cpu = qemu_get_cpu(cpu_index);
1979 if (cpu == NULL) {
c6bd8c70
MA
1980 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1981 "a CPU number");
0cfd6a9a
LC
1982 return;
1983 }
1984
1985 f = fopen(filename, "wb");
1986 if (!f) {
618da851 1987 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1988 return;
1989 }
1990
1991 while (size != 0) {
1992 l = sizeof(buf);
1993 if (l > size)
1994 l = size;
2f4d0f59 1995 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1996 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1997 " specified", orig_addr, orig_size);
2f4d0f59
AK
1998 goto exit;
1999 }
0cfd6a9a 2000 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2001 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2002 goto exit;
2003 }
2004 addr += l;
2005 size -= l;
2006 }
2007
2008exit:
2009 fclose(f);
2010}
6d3962bf
LC
2011
2012void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2013 Error **errp)
2014{
2015 FILE *f;
2016 uint32_t l;
2017 uint8_t buf[1024];
2018
2019 f = fopen(filename, "wb");
2020 if (!f) {
618da851 2021 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2022 return;
2023 }
2024
2025 while (size != 0) {
2026 l = sizeof(buf);
2027 if (l > size)
2028 l = size;
eb6282f2 2029 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2030 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2031 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2032 goto exit;
2033 }
2034 addr += l;
2035 size -= l;
2036 }
2037
2038exit:
2039 fclose(f);
2040}
ab49ab5c
LC
2041
2042void qmp_inject_nmi(Error **errp)
2043{
9cb805fd 2044 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2045}
27498bef
ST
2046
2047void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2048{
2049 if (!use_icount) {
2050 return;
2051 }
2052
2053 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2054 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2055 if (icount_align_option) {
2056 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2057 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2058 } else {
2059 cpu_fprintf(f, "Max guest delay NA\n");
2060 cpu_fprintf(f, "Max guest advance NA\n");
2061 }
2062}