]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
block: Fix bdrv_co_flush early return
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
0ff0fc19 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
27498bef
ST
72int64_t max_delay;
73int64_t max_advance;
296af7c9 74
2adcc85d
JH
75/* vcpu throttling controls */
76static QEMUTimer *throttle_timer;
77static unsigned int throttle_percentage;
78
79#define CPU_THROTTLE_PCT_MIN 1
80#define CPU_THROTTLE_PCT_MAX 99
81#define CPU_THROTTLE_TIMESLICE_NS 10000000
82
321bc0b2
TC
83bool cpu_is_stopped(CPUState *cpu)
84{
85 return cpu->stopped || !runstate_is_running();
86}
87
a98ae1d8 88static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 89{
c64ca814 90 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
91 return false;
92 }
321bc0b2 93 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
94 return true;
95 }
8c2e1b00 96 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 97 kvm_halt_in_kernel()) {
ac873f1e
PM
98 return false;
99 }
100 return true;
101}
102
103static bool all_cpu_threads_idle(void)
104{
182735ef 105 CPUState *cpu;
ac873f1e 106
bdc44640 107 CPU_FOREACH(cpu) {
182735ef 108 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
109 return false;
110 }
111 }
112 return true;
113}
114
946fb27c
PB
115/***********************************************************/
116/* guest cycle counter */
117
a3270e19
PB
118/* Protected by TimersState seqlock */
119
5045e9d9 120static bool icount_sleep = true;
71468395 121static int64_t vm_clock_warp_start = -1;
946fb27c
PB
122/* Conversion factor from emulated instructions to virtual clock ticks. */
123static int icount_time_shift;
124/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125#define MAX_ICOUNT_SHIFT 10
a3270e19 126
946fb27c
PB
127static QEMUTimer *icount_rt_timer;
128static QEMUTimer *icount_vm_timer;
129static QEMUTimer *icount_warp_timer;
946fb27c
PB
130
131typedef struct TimersState {
cb365646 132 /* Protected by BQL. */
946fb27c
PB
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
cb365646
LPF
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
c96778bb
FK
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
946fb27c
PB
148} TimersState;
149
d9cd4007 150static TimersState timers_state;
8d4e9146
FK
151bool mttcg_enabled;
152
153/*
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
160 *
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
163 *
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
166 *
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
171 */
172
173static bool check_tcg_memory_orders_compatible(void)
174{
175#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177#else
178 return false;
179#endif
180}
181
182static bool default_mttcg_enabled(void)
183{
83fd9629 184 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
185 return false;
186 } else {
187#ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
189#else
190 return false;
191#endif
192 }
193}
194
195void qemu_tcg_configure(QemuOpts *opts, Error **errp)
196{
197 const char *t = qemu_opt_get(opts, "thread");
198 if (t) {
199 if (strcmp(t, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST) {
201 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
202 } else if (use_icount) {
203 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 204 } else {
86953503 205#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
208#endif
8d4e9146
FK
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
8cfef892 212 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
213 }
214 mttcg_enabled = true;
215 }
216 } else if (strcmp(t, "single") == 0) {
217 mttcg_enabled = false;
218 } else {
219 error_setg(errp, "Invalid 'thread' setting %s", t);
220 }
221 } else {
222 mttcg_enabled = default_mttcg_enabled();
223 }
224}
946fb27c 225
e4cd9657
AB
226/* The current number of executed instructions is based on what we
227 * originally budgeted minus the current state of the decrementing
228 * icount counters in extra/u16.low.
229 */
230static int64_t cpu_get_icount_executed(CPUState *cpu)
231{
232 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
233}
234
512d3c80
AB
235/*
236 * Update the global shared timer_state.qemu_icount to take into
237 * account executed instructions. This is done by the TCG vCPU
238 * thread so the main-loop can see time has moved forward.
239 */
240void cpu_update_icount(CPUState *cpu)
241{
242 int64_t executed = cpu_get_icount_executed(cpu);
243 cpu->icount_budget -= executed;
244
245#ifdef CONFIG_ATOMIC64
246 atomic_set__nocheck(&timers_state.qemu_icount,
247 atomic_read__nocheck(&timers_state.qemu_icount) +
248 executed);
249#else /* FIXME: we need 64bit atomics to do this safely */
250 timers_state.qemu_icount += executed;
251#endif
252}
253
2a62914b 254int64_t cpu_get_icount_raw(void)
946fb27c 255{
4917cf44 256 CPUState *cpu = current_cpu;
946fb27c 257
243c5f77 258 if (cpu && cpu->running) {
414b15c9 259 if (!cpu->can_do_io) {
2a62914b
PD
260 fprintf(stderr, "Bad icount read\n");
261 exit(1);
946fb27c 262 }
e4cd9657 263 /* Take into account what has run */
1d05906b 264 cpu_update_icount(cpu);
946fb27c 265 }
1d05906b
AB
266#ifdef CONFIG_ATOMIC64
267 return atomic_read__nocheck(&timers_state.qemu_icount);
268#else /* FIXME: we need 64bit atomics to do this safely */
269 return timers_state.qemu_icount;
270#endif
2a62914b
PD
271}
272
273/* Return the virtual CPU time, based on the instruction counter. */
274static int64_t cpu_get_icount_locked(void)
275{
276 int64_t icount = cpu_get_icount_raw();
3f031313 277 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
278}
279
17a15f1b
PB
280int64_t cpu_get_icount(void)
281{
282 int64_t icount;
283 unsigned start;
284
285 do {
286 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
287 icount = cpu_get_icount_locked();
288 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
289
290 return icount;
291}
292
3f031313
FK
293int64_t cpu_icount_to_ns(int64_t icount)
294{
295 return icount << icount_time_shift;
296}
297
d90f3cca
C
298/* return the time elapsed in VM between vm_start and vm_stop. Unless
299 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
300 * counter.
301 *
302 * Caller must hold the BQL
303 */
946fb27c
PB
304int64_t cpu_get_ticks(void)
305{
5f3e3101
PB
306 int64_t ticks;
307
946fb27c
PB
308 if (use_icount) {
309 return cpu_get_icount();
310 }
5f3e3101
PB
311
312 ticks = timers_state.cpu_ticks_offset;
313 if (timers_state.cpu_ticks_enabled) {
4a7428c5 314 ticks += cpu_get_host_ticks();
5f3e3101
PB
315 }
316
317 if (timers_state.cpu_ticks_prev > ticks) {
318 /* Note: non increasing ticks may happen if the host uses
319 software suspend */
320 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
321 ticks = timers_state.cpu_ticks_prev;
946fb27c 322 }
5f3e3101
PB
323
324 timers_state.cpu_ticks_prev = ticks;
325 return ticks;
946fb27c
PB
326}
327
cb365646 328static int64_t cpu_get_clock_locked(void)
946fb27c 329{
1d45cea5 330 int64_t time;
cb365646 331
1d45cea5 332 time = timers_state.cpu_clock_offset;
5f3e3101 333 if (timers_state.cpu_ticks_enabled) {
1d45cea5 334 time += get_clock();
946fb27c 335 }
cb365646 336
1d45cea5 337 return time;
cb365646
LPF
338}
339
d90f3cca 340/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
341 * the time between vm_start and vm_stop
342 */
cb365646
LPF
343int64_t cpu_get_clock(void)
344{
345 int64_t ti;
346 unsigned start;
347
348 do {
349 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
350 ti = cpu_get_clock_locked();
351 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
352
353 return ti;
946fb27c
PB
354}
355
cb365646 356/* enable cpu_get_ticks()
3224e878 357 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 358 */
946fb27c
PB
359void cpu_enable_ticks(void)
360{
cb365646 361 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 362 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 363 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 364 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
365 timers_state.cpu_clock_offset -= get_clock();
366 timers_state.cpu_ticks_enabled = 1;
367 }
03719e44 368 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
369}
370
371/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 372 * cpu_get_ticks() after that.
3224e878 373 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 374 */
946fb27c
PB
375void cpu_disable_ticks(void)
376{
cb365646 377 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 378 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 379 if (timers_state.cpu_ticks_enabled) {
4a7428c5 380 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 381 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
382 timers_state.cpu_ticks_enabled = 0;
383 }
03719e44 384 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
385}
386
387/* Correlation between real and virtual time is always going to be
388 fairly approximate, so ignore small variation.
389 When the guest is idle real and virtual time will be aligned in
390 the IO wait loop. */
73bcb24d 391#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
392
393static void icount_adjust(void)
394{
395 int64_t cur_time;
396 int64_t cur_icount;
397 int64_t delta;
a3270e19
PB
398
399 /* Protected by TimersState mutex. */
946fb27c 400 static int64_t last_delta;
468cc7cf 401
946fb27c
PB
402 /* If the VM is not running, then do nothing. */
403 if (!runstate_is_running()) {
404 return;
405 }
468cc7cf 406
03719e44 407 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
408 cur_time = cpu_get_clock_locked();
409 cur_icount = cpu_get_icount_locked();
468cc7cf 410
946fb27c
PB
411 delta = cur_icount - cur_time;
412 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
413 if (delta > 0
414 && last_delta + ICOUNT_WOBBLE < delta * 2
415 && icount_time_shift > 0) {
416 /* The guest is getting too far ahead. Slow time down. */
417 icount_time_shift--;
418 }
419 if (delta < 0
420 && last_delta - ICOUNT_WOBBLE > delta * 2
421 && icount_time_shift < MAX_ICOUNT_SHIFT) {
422 /* The guest is getting too far behind. Speed time up. */
423 icount_time_shift++;
424 }
425 last_delta = delta;
c96778bb
FK
426 timers_state.qemu_icount_bias = cur_icount
427 - (timers_state.qemu_icount << icount_time_shift);
03719e44 428 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
429}
430
431static void icount_adjust_rt(void *opaque)
432{
40daca54 433 timer_mod(icount_rt_timer,
1979b908 434 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
435 icount_adjust();
436}
437
438static void icount_adjust_vm(void *opaque)
439{
40daca54
AB
440 timer_mod(icount_vm_timer,
441 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 442 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
443 icount_adjust();
444}
445
446static int64_t qemu_icount_round(int64_t count)
447{
448 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
449}
450
efab87cf 451static void icount_warp_rt(void)
946fb27c 452{
ccffff48
AB
453 unsigned seq;
454 int64_t warp_start;
455
17a15f1b
PB
456 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
457 * changes from -1 to another value, so the race here is okay.
458 */
ccffff48
AB
459 do {
460 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
461 warp_start = vm_clock_warp_start;
462 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
463
464 if (warp_start == -1) {
946fb27c
PB
465 return;
466 }
467
03719e44 468 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 469 if (runstate_is_running()) {
8eda206e
PD
470 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
471 cpu_get_clock_locked());
8ed961d9
PB
472 int64_t warp_delta;
473
474 warp_delta = clock - vm_clock_warp_start;
475 if (use_icount == 2) {
946fb27c 476 /*
40daca54 477 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
478 * far ahead of real time.
479 */
17a15f1b 480 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 481 int64_t delta = clock - cur_icount;
8ed961d9 482 warp_delta = MIN(warp_delta, delta);
946fb27c 483 }
c96778bb 484 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
485 }
486 vm_clock_warp_start = -1;
03719e44 487 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
488
489 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
490 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
491 }
946fb27c
PB
492}
493
e76d1798 494static void icount_timer_cb(void *opaque)
efab87cf 495{
e76d1798
PD
496 /* No need for a checkpoint because the timer already synchronizes
497 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
498 */
499 icount_warp_rt();
efab87cf
PD
500}
501
8156be56
PB
502void qtest_clock_warp(int64_t dest)
503{
40daca54 504 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 505 AioContext *aio_context;
8156be56 506 assert(qtest_enabled());
efef88b3 507 aio_context = qemu_get_aio_context();
8156be56 508 while (clock < dest) {
40daca54 509 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 510 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 511
03719e44 512 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 513 timers_state.qemu_icount_bias += warp;
03719e44 514 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 515
40daca54 516 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 517 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 518 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 519 }
40daca54 520 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
521}
522
e76d1798 523void qemu_start_warp_timer(void)
946fb27c 524{
ce78d18c 525 int64_t clock;
946fb27c
PB
526 int64_t deadline;
527
e76d1798 528 if (!use_icount) {
946fb27c
PB
529 return;
530 }
531
8bd7f71d
PD
532 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
533 * do not fire, so computing the deadline does not make sense.
534 */
535 if (!runstate_is_running()) {
536 return;
537 }
538
539 /* warp clock deterministically in record/replay mode */
e76d1798 540 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
541 return;
542 }
543
ce78d18c 544 if (!all_cpu_threads_idle()) {
946fb27c
PB
545 return;
546 }
547
8156be56
PB
548 if (qtest_enabled()) {
549 /* When testing, qtest commands advance icount. */
e76d1798 550 return;
8156be56
PB
551 }
552
ac70aafc 553 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 554 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 555 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 556 if (deadline < 0) {
d7a0f71d
VC
557 static bool notified;
558 if (!icount_sleep && !notified) {
559 error_report("WARNING: icount sleep disabled and no active timers");
560 notified = true;
561 }
ce78d18c 562 return;
ac70aafc
AB
563 }
564
946fb27c
PB
565 if (deadline > 0) {
566 /*
40daca54 567 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
568 * sleep. Otherwise, the CPU might be waiting for a future timer
569 * interrupt to wake it up, but the interrupt never comes because
570 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 571 * QEMU_CLOCK_VIRTUAL.
946fb27c 572 */
5045e9d9
VC
573 if (!icount_sleep) {
574 /*
575 * We never let VCPUs sleep in no sleep icount mode.
576 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
577 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
578 * It is useful when we want a deterministic execution time,
579 * isolated from host latencies.
580 */
03719e44 581 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 582 timers_state.qemu_icount_bias += deadline;
03719e44 583 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
584 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
585 } else {
586 /*
587 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588 * "real" time, (related to the time left until the next event) has
589 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590 * This avoids that the warps are visible externally; for example,
591 * you will not be sending network packets continuously instead of
592 * every 100ms.
593 */
03719e44 594 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
595 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
596 vm_clock_warp_start = clock;
597 }
03719e44 598 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 599 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 600 }
ac70aafc 601 } else if (deadline == 0) {
40daca54 602 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
603 }
604}
605
e76d1798
PD
606static void qemu_account_warp_timer(void)
607{
608 if (!use_icount || !icount_sleep) {
609 return;
610 }
611
612 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
613 * do not fire, so computing the deadline does not make sense.
614 */
615 if (!runstate_is_running()) {
616 return;
617 }
618
619 /* warp clock deterministically in record/replay mode */
620 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
621 return;
622 }
623
624 timer_del(icount_warp_timer);
625 icount_warp_rt();
626}
627
d09eae37
FK
628static bool icount_state_needed(void *opaque)
629{
630 return use_icount;
631}
632
633/*
634 * This is a subsection for icount migration.
635 */
636static const VMStateDescription icount_vmstate_timers = {
637 .name = "timer/icount",
638 .version_id = 1,
639 .minimum_version_id = 1,
5cd8cada 640 .needed = icount_state_needed,
d09eae37
FK
641 .fields = (VMStateField[]) {
642 VMSTATE_INT64(qemu_icount_bias, TimersState),
643 VMSTATE_INT64(qemu_icount, TimersState),
644 VMSTATE_END_OF_LIST()
645 }
646};
647
946fb27c
PB
648static const VMStateDescription vmstate_timers = {
649 .name = "timer",
650 .version_id = 2,
651 .minimum_version_id = 1,
35d08458 652 .fields = (VMStateField[]) {
946fb27c
PB
653 VMSTATE_INT64(cpu_ticks_offset, TimersState),
654 VMSTATE_INT64(dummy, TimersState),
655 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
656 VMSTATE_END_OF_LIST()
d09eae37 657 },
5cd8cada
JQ
658 .subsections = (const VMStateDescription*[]) {
659 &icount_vmstate_timers,
660 NULL
946fb27c
PB
661 }
662};
663
14e6fe12 664static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 665{
2adcc85d
JH
666 double pct;
667 double throttle_ratio;
668 long sleeptime_ns;
669
670 if (!cpu_throttle_get_percentage()) {
671 return;
672 }
673
674 pct = (double)cpu_throttle_get_percentage()/100;
675 throttle_ratio = pct / (1 - pct);
676 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
677
678 qemu_mutex_unlock_iothread();
679 atomic_set(&cpu->throttle_thread_scheduled, 0);
680 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
681 qemu_mutex_lock_iothread();
682}
683
684static void cpu_throttle_timer_tick(void *opaque)
685{
686 CPUState *cpu;
687 double pct;
688
689 /* Stop the timer if needed */
690 if (!cpu_throttle_get_percentage()) {
691 return;
692 }
693 CPU_FOREACH(cpu) {
694 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
695 async_run_on_cpu(cpu, cpu_throttle_thread,
696 RUN_ON_CPU_NULL);
2adcc85d
JH
697 }
698 }
699
700 pct = (double)cpu_throttle_get_percentage()/100;
701 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
702 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
703}
704
705void cpu_throttle_set(int new_throttle_pct)
706{
707 /* Ensure throttle percentage is within valid range */
708 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
709 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
710
711 atomic_set(&throttle_percentage, new_throttle_pct);
712
713 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
714 CPU_THROTTLE_TIMESLICE_NS);
715}
716
717void cpu_throttle_stop(void)
718{
719 atomic_set(&throttle_percentage, 0);
720}
721
722bool cpu_throttle_active(void)
723{
724 return (cpu_throttle_get_percentage() != 0);
725}
726
727int cpu_throttle_get_percentage(void)
728{
729 return atomic_read(&throttle_percentage);
730}
731
4603ea01
PD
732void cpu_ticks_init(void)
733{
ccdb3c1f 734 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 735 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
736 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
737 cpu_throttle_timer_tick, NULL);
4603ea01
PD
738}
739
1ad9580b 740void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 741{
1ad9580b 742 const char *option;
a8bfac37 743 char *rem_str = NULL;
1ad9580b 744
1ad9580b 745 option = qemu_opt_get(opts, "shift");
946fb27c 746 if (!option) {
a8bfac37
ST
747 if (qemu_opt_get(opts, "align") != NULL) {
748 error_setg(errp, "Please specify shift option when using align");
749 }
946fb27c
PB
750 return;
751 }
f1f4b57e
VC
752
753 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
754 if (icount_sleep) {
755 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 756 icount_timer_cb, NULL);
5045e9d9 757 }
f1f4b57e 758
a8bfac37 759 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
760
761 if (icount_align_option && !icount_sleep) {
778d9f9b 762 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 763 }
946fb27c 764 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
765 errno = 0;
766 icount_time_shift = strtol(option, &rem_str, 0);
767 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
768 error_setg(errp, "icount: Invalid shift value");
769 }
946fb27c
PB
770 use_icount = 1;
771 return;
a8bfac37
ST
772 } else if (icount_align_option) {
773 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 774 } else if (!icount_sleep) {
778d9f9b 775 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
776 }
777
778 use_icount = 2;
779
780 /* 125MIPS seems a reasonable initial guess at the guest speed.
781 It will be corrected fairly quickly anyway. */
782 icount_time_shift = 3;
783
784 /* Have both realtime and virtual time triggers for speed adjustment.
785 The realtime trigger catches emulated time passing too slowly,
786 the virtual time trigger catches emulated time passing too fast.
787 Realtime triggers occur even when idle, so use them less frequently
788 than VM triggers. */
bf2a7ddb
PD
789 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
790 icount_adjust_rt, NULL);
40daca54 791 timer_mod(icount_rt_timer,
bf2a7ddb 792 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
793 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
794 icount_adjust_vm, NULL);
795 timer_mod(icount_vm_timer,
796 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 797 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
798}
799
6546706d
AB
800/***********************************************************/
801/* TCG vCPU kick timer
802 *
803 * The kick timer is responsible for moving single threaded vCPU
804 * emulation on to the next vCPU. If more than one vCPU is running a
805 * timer event with force a cpu->exit so the next vCPU can get
806 * scheduled.
807 *
808 * The timer is removed if all vCPUs are idle and restarted again once
809 * idleness is complete.
810 */
811
812static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 813static CPUState *tcg_current_rr_cpu;
6546706d
AB
814
815#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
816
817static inline int64_t qemu_tcg_next_kick(void)
818{
819 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
820}
821
791158d9
AB
822/* Kick the currently round-robin scheduled vCPU */
823static void qemu_cpu_kick_rr_cpu(void)
824{
825 CPUState *cpu;
791158d9
AB
826 do {
827 cpu = atomic_mb_read(&tcg_current_rr_cpu);
828 if (cpu) {
829 cpu_exit(cpu);
830 }
831 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
832}
833
6b8f0187
PB
834static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
835{
836}
837
3f53bc61
PB
838void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
839{
6b8f0187
PB
840 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
841 qemu_notify_event();
842 return;
843 }
844
845 if (!qemu_in_vcpu_thread() && first_cpu) {
846 /* qemu_cpu_kick is not enough to kick a halted CPU out of
847 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
848 * causes cpu_thread_is_idle to return false. This way,
849 * handle_icount_deadline can run.
850 */
851 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
852 }
3f53bc61
PB
853}
854
6546706d
AB
855static void kick_tcg_thread(void *opaque)
856{
857 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 858 qemu_cpu_kick_rr_cpu();
6546706d
AB
859}
860
861static void start_tcg_kick_timer(void)
862{
37257942 863 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
864 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
865 kick_tcg_thread, NULL);
866 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
867 }
868}
869
870static void stop_tcg_kick_timer(void)
871{
872 if (tcg_kick_vcpu_timer) {
873 timer_del(tcg_kick_vcpu_timer);
874 tcg_kick_vcpu_timer = NULL;
875 }
876}
877
296af7c9
BS
878/***********************************************************/
879void hw_error(const char *fmt, ...)
880{
881 va_list ap;
55e5c285 882 CPUState *cpu;
296af7c9
BS
883
884 va_start(ap, fmt);
885 fprintf(stderr, "qemu: hardware error: ");
886 vfprintf(stderr, fmt, ap);
887 fprintf(stderr, "\n");
bdc44640 888 CPU_FOREACH(cpu) {
55e5c285 889 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 890 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
891 }
892 va_end(ap);
893 abort();
894}
895
896void cpu_synchronize_all_states(void)
897{
182735ef 898 CPUState *cpu;
296af7c9 899
bdc44640 900 CPU_FOREACH(cpu) {
182735ef 901 cpu_synchronize_state(cpu);
296af7c9
BS
902 }
903}
904
905void cpu_synchronize_all_post_reset(void)
906{
182735ef 907 CPUState *cpu;
296af7c9 908
bdc44640 909 CPU_FOREACH(cpu) {
182735ef 910 cpu_synchronize_post_reset(cpu);
296af7c9
BS
911 }
912}
913
914void cpu_synchronize_all_post_init(void)
915{
182735ef 916 CPUState *cpu;
296af7c9 917
bdc44640 918 CPU_FOREACH(cpu) {
182735ef 919 cpu_synchronize_post_init(cpu);
296af7c9
BS
920 }
921}
922
56983463 923static int do_vm_stop(RunState state)
296af7c9 924{
56983463
KW
925 int ret = 0;
926
1354869c 927 if (runstate_is_running()) {
296af7c9 928 cpu_disable_ticks();
296af7c9 929 pause_all_vcpus();
f5bbfba1 930 runstate_set(state);
1dfb4dd9 931 vm_state_notify(0, state);
a4e15de9 932 qapi_event_send_stop(&error_abort);
296af7c9 933 }
56983463 934
594a45ce 935 bdrv_drain_all();
6d0ceb80 936 replay_disable_events();
22af08ea 937 ret = bdrv_flush_all();
594a45ce 938
56983463 939 return ret;
296af7c9
BS
940}
941
a1fcaa73 942static bool cpu_can_run(CPUState *cpu)
296af7c9 943{
4fdeee7c 944 if (cpu->stop) {
a1fcaa73 945 return false;
0ab07c62 946 }
321bc0b2 947 if (cpu_is_stopped(cpu)) {
a1fcaa73 948 return false;
0ab07c62 949 }
a1fcaa73 950 return true;
296af7c9
BS
951}
952
91325046 953static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 954{
64f6b346 955 gdb_set_stop_cpu(cpu);
8cf71710 956 qemu_system_debug_request();
f324e766 957 cpu->stopped = true;
3c638d06
JK
958}
959
6d9cb73c
JK
960#ifdef CONFIG_LINUX
961static void sigbus_reraise(void)
962{
963 sigset_t set;
964 struct sigaction action;
965
966 memset(&action, 0, sizeof(action));
967 action.sa_handler = SIG_DFL;
968 if (!sigaction(SIGBUS, &action, NULL)) {
969 raise(SIGBUS);
970 sigemptyset(&set);
971 sigaddset(&set, SIGBUS);
a2d1761d 972 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
973 }
974 perror("Failed to re-raise SIGBUS!\n");
975 abort();
976}
977
d98d4072 978static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 979{
a16fc07e
PB
980 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
981 sigbus_reraise();
982 }
983
2ae41db2
PB
984 if (current_cpu) {
985 /* Called asynchronously in VCPU thread. */
986 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
987 sigbus_reraise();
988 }
989 } else {
990 /* Called synchronously (via signalfd) in main thread. */
991 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
992 sigbus_reraise();
993 }
6d9cb73c
JK
994 }
995}
996
997static void qemu_init_sigbus(void)
998{
999 struct sigaction action;
1000
1001 memset(&action, 0, sizeof(action));
1002 action.sa_flags = SA_SIGINFO;
d98d4072 1003 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1004 sigaction(SIGBUS, &action, NULL);
1005
1006 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1007}
6d9cb73c 1008#else /* !CONFIG_LINUX */
6d9cb73c
JK
1009static void qemu_init_sigbus(void)
1010{
1011}
a16fc07e 1012#endif /* !CONFIG_LINUX */
ff48eb5f 1013
b2532d88 1014static QemuMutex qemu_global_mutex;
296af7c9
BS
1015
1016static QemuThread io_thread;
1017
296af7c9
BS
1018/* cpu creation */
1019static QemuCond qemu_cpu_cond;
1020/* system init */
296af7c9
BS
1021static QemuCond qemu_pause_cond;
1022
d3b12f5d 1023void qemu_init_cpu_loop(void)
296af7c9 1024{
6d9cb73c 1025 qemu_init_sigbus();
ed94592b 1026 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1027 qemu_cond_init(&qemu_pause_cond);
296af7c9 1028 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1029
b7680cb6 1030 qemu_thread_get_self(&io_thread);
296af7c9
BS
1031}
1032
14e6fe12 1033void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1034{
d148d90e 1035 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1036}
1037
4c055ab5
GZ
1038static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1039{
1040 if (kvm_destroy_vcpu(cpu) < 0) {
1041 error_report("kvm_destroy_vcpu failed");
1042 exit(EXIT_FAILURE);
1043 }
1044}
1045
1046static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1047{
1048}
1049
509a0d78 1050static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1051{
37257942 1052 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1053 if (cpu->stop) {
1054 cpu->stop = false;
f324e766 1055 cpu->stopped = true;
96bce683 1056 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1057 }
a5403c69 1058 process_queued_cpu_work(cpu);
37257942
AB
1059}
1060
1061static bool qemu_tcg_should_sleep(CPUState *cpu)
1062{
1063 if (mttcg_enabled) {
1064 return cpu_thread_is_idle(cpu);
1065 } else {
1066 return all_cpu_threads_idle();
1067 }
296af7c9
BS
1068}
1069
d5f8d613 1070static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1071{
37257942 1072 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1073 stop_tcg_kick_timer();
d5f8d613 1074 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1075 }
296af7c9 1076
6546706d
AB
1077 start_tcg_kick_timer();
1078
37257942 1079 qemu_wait_io_event_common(cpu);
296af7c9
BS
1080}
1081
fd529e8f 1082static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1083{
a98ae1d8 1084 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1085 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1086 }
296af7c9 1087
509a0d78 1088 qemu_wait_io_event_common(cpu);
296af7c9
BS
1089}
1090
7e97cd88 1091static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1092{
48a106bd 1093 CPUState *cpu = arg;
84b4915d 1094 int r;
296af7c9 1095
ab28bd23
PB
1096 rcu_register_thread();
1097
2e7f7a3c 1098 qemu_mutex_lock_iothread();
814e612e 1099 qemu_thread_get_self(cpu->thread);
9f09e18a 1100 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1101 cpu->can_do_io = 1;
4917cf44 1102 current_cpu = cpu;
296af7c9 1103
504134d2 1104 r = kvm_init_vcpu(cpu);
84b4915d
JK
1105 if (r < 0) {
1106 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1107 exit(1);
1108 }
296af7c9 1109
18268b60 1110 kvm_init_cpu_signals(cpu);
296af7c9
BS
1111
1112 /* signal CPU creation */
61a46217 1113 cpu->created = true;
296af7c9
BS
1114 qemu_cond_signal(&qemu_cpu_cond);
1115
4c055ab5 1116 do {
a1fcaa73 1117 if (cpu_can_run(cpu)) {
1458c363 1118 r = kvm_cpu_exec(cpu);
83f338f7 1119 if (r == EXCP_DEBUG) {
91325046 1120 cpu_handle_guest_debug(cpu);
83f338f7 1121 }
0ab07c62 1122 }
fd529e8f 1123 qemu_kvm_wait_io_event(cpu);
4c055ab5 1124 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1125
4c055ab5 1126 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1127 cpu->created = false;
1128 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1129 qemu_mutex_unlock_iothread();
296af7c9
BS
1130 return NULL;
1131}
1132
c7f0f3b1
AL
1133static void *qemu_dummy_cpu_thread_fn(void *arg)
1134{
1135#ifdef _WIN32
1136 fprintf(stderr, "qtest is not supported under Windows\n");
1137 exit(1);
1138#else
10a9021d 1139 CPUState *cpu = arg;
c7f0f3b1
AL
1140 sigset_t waitset;
1141 int r;
1142
ab28bd23
PB
1143 rcu_register_thread();
1144
c7f0f3b1 1145 qemu_mutex_lock_iothread();
814e612e 1146 qemu_thread_get_self(cpu->thread);
9f09e18a 1147 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1148 cpu->can_do_io = 1;
37257942 1149 current_cpu = cpu;
c7f0f3b1
AL
1150
1151 sigemptyset(&waitset);
1152 sigaddset(&waitset, SIG_IPI);
1153
1154 /* signal CPU creation */
61a46217 1155 cpu->created = true;
c7f0f3b1
AL
1156 qemu_cond_signal(&qemu_cpu_cond);
1157
c7f0f3b1 1158 while (1) {
c7f0f3b1
AL
1159 qemu_mutex_unlock_iothread();
1160 do {
1161 int sig;
1162 r = sigwait(&waitset, &sig);
1163 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1164 if (r == -1) {
1165 perror("sigwait");
1166 exit(1);
1167 }
1168 qemu_mutex_lock_iothread();
509a0d78 1169 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1170 }
1171
1172 return NULL;
1173#endif
1174}
1175
1be7fcb8
AB
1176static int64_t tcg_get_icount_limit(void)
1177{
1178 int64_t deadline;
1179
1180 if (replay_mode != REPLAY_MODE_PLAY) {
1181 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1182
1183 /* Maintain prior (possibly buggy) behaviour where if no deadline
1184 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1185 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1186 * nanoseconds.
1187 */
1188 if ((deadline < 0) || (deadline > INT32_MAX)) {
1189 deadline = INT32_MAX;
1190 }
1191
1192 return qemu_icount_round(deadline);
1193 } else {
1194 return replay_get_instructions();
1195 }
1196}
1197
12e9700d
AB
1198static void handle_icount_deadline(void)
1199{
6b8f0187 1200 assert(qemu_in_vcpu_thread());
12e9700d
AB
1201 if (use_icount) {
1202 int64_t deadline =
1203 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1204
1205 if (deadline == 0) {
6b8f0187 1206 /* Wake up other AioContexts. */
12e9700d 1207 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1208 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1209 }
1210 }
1211}
1212
05248382 1213static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1214{
1be7fcb8 1215 if (use_icount) {
eda5f7c6 1216 int insns_left;
05248382
AB
1217
1218 /* These should always be cleared by process_icount_data after
1219 * each vCPU execution. However u16.high can be raised
1220 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1221 */
1222 g_assert(cpu->icount_decr.u16.low == 0);
1223 g_assert(cpu->icount_extra == 0);
1224
eda5f7c6
AB
1225 cpu->icount_budget = tcg_get_icount_limit();
1226 insns_left = MIN(0xffff, cpu->icount_budget);
1227 cpu->icount_decr.u16.low = insns_left;
1228 cpu->icount_extra = cpu->icount_budget - insns_left;
1be7fcb8 1229 }
05248382
AB
1230}
1231
1232static void process_icount_data(CPUState *cpu)
1233{
1be7fcb8 1234 if (use_icount) {
e4cd9657 1235 /* Account for executed instructions */
512d3c80 1236 cpu_update_icount(cpu);
05248382
AB
1237
1238 /* Reset the counters */
1239 cpu->icount_decr.u16.low = 0;
1be7fcb8 1240 cpu->icount_extra = 0;
e4cd9657
AB
1241 cpu->icount_budget = 0;
1242
1be7fcb8
AB
1243 replay_account_executed_instructions();
1244 }
05248382
AB
1245}
1246
1247
1248static int tcg_cpu_exec(CPUState *cpu)
1249{
1250 int ret;
1251#ifdef CONFIG_PROFILER
1252 int64_t ti;
1253#endif
1254
1255#ifdef CONFIG_PROFILER
1256 ti = profile_getclock();
1257#endif
1258 qemu_mutex_unlock_iothread();
1259 cpu_exec_start(cpu);
1260 ret = cpu_exec(cpu);
1261 cpu_exec_end(cpu);
1262 qemu_mutex_lock_iothread();
1263#ifdef CONFIG_PROFILER
1264 tcg_time += profile_getclock() - ti;
1265#endif
1be7fcb8
AB
1266 return ret;
1267}
1268
c93bbbef
AB
1269/* Destroy any remaining vCPUs which have been unplugged and have
1270 * finished running
1271 */
1272static void deal_with_unplugged_cpus(void)
1be7fcb8 1273{
c93bbbef 1274 CPUState *cpu;
1be7fcb8 1275
c93bbbef
AB
1276 CPU_FOREACH(cpu) {
1277 if (cpu->unplug && !cpu_can_run(cpu)) {
1278 qemu_tcg_destroy_vcpu(cpu);
1279 cpu->created = false;
1280 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1281 break;
1282 }
1283 }
1be7fcb8 1284}
bdb7ca67 1285
6546706d
AB
1286/* Single-threaded TCG
1287 *
1288 * In the single-threaded case each vCPU is simulated in turn. If
1289 * there is more than a single vCPU we create a simple timer to kick
1290 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1291 * This is done explicitly rather than relying on side-effects
1292 * elsewhere.
1293 */
1294
37257942 1295static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1296{
c3586ba7 1297 CPUState *cpu = arg;
296af7c9 1298
ab28bd23
PB
1299 rcu_register_thread();
1300
2e7f7a3c 1301 qemu_mutex_lock_iothread();
814e612e 1302 qemu_thread_get_self(cpu->thread);
296af7c9 1303
38fcbd3f
AF
1304 CPU_FOREACH(cpu) {
1305 cpu->thread_id = qemu_get_thread_id();
1306 cpu->created = true;
626cf8f4 1307 cpu->can_do_io = 1;
38fcbd3f 1308 }
296af7c9
BS
1309 qemu_cond_signal(&qemu_cpu_cond);
1310
fa7d1867 1311 /* wait for initial kick-off after machine start */
c28e399c 1312 while (first_cpu->stopped) {
d5f8d613 1313 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1314
1315 /* process any pending work */
bdc44640 1316 CPU_FOREACH(cpu) {
37257942 1317 current_cpu = cpu;
182735ef 1318 qemu_wait_io_event_common(cpu);
8e564b4e 1319 }
0ab07c62 1320 }
296af7c9 1321
6546706d
AB
1322 start_tcg_kick_timer();
1323
c93bbbef
AB
1324 cpu = first_cpu;
1325
e5143e30
AB
1326 /* process any pending work */
1327 cpu->exit_request = 1;
1328
296af7c9 1329 while (1) {
c93bbbef
AB
1330 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1331 qemu_account_warp_timer();
1332
6b8f0187
PB
1333 /* Run the timers here. This is much more efficient than
1334 * waking up the I/O thread and waiting for completion.
1335 */
1336 handle_icount_deadline();
1337
c93bbbef
AB
1338 if (!cpu) {
1339 cpu = first_cpu;
1340 }
1341
e5143e30
AB
1342 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1343
791158d9 1344 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1345 current_cpu = cpu;
c93bbbef
AB
1346
1347 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1348 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1349
1350 if (cpu_can_run(cpu)) {
1351 int r;
05248382
AB
1352
1353 prepare_icount_for_run(cpu);
1354
c93bbbef 1355 r = tcg_cpu_exec(cpu);
05248382
AB
1356
1357 process_icount_data(cpu);
1358
c93bbbef
AB
1359 if (r == EXCP_DEBUG) {
1360 cpu_handle_guest_debug(cpu);
1361 break;
08e73c48
PK
1362 } else if (r == EXCP_ATOMIC) {
1363 qemu_mutex_unlock_iothread();
1364 cpu_exec_step_atomic(cpu);
1365 qemu_mutex_lock_iothread();
1366 break;
c93bbbef 1367 }
37257942 1368 } else if (cpu->stop) {
c93bbbef
AB
1369 if (cpu->unplug) {
1370 cpu = CPU_NEXT(cpu);
1371 }
1372 break;
1373 }
1374
e5143e30
AB
1375 cpu = CPU_NEXT(cpu);
1376 } /* while (cpu && !cpu->exit_request).. */
1377
791158d9
AB
1378 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1379 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1380
e5143e30
AB
1381 if (cpu && cpu->exit_request) {
1382 atomic_mb_set(&cpu->exit_request, 0);
1383 }
ac70aafc 1384
37257942 1385 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1386 deal_with_unplugged_cpus();
296af7c9
BS
1387 }
1388
1389 return NULL;
1390}
1391
b0cb0a66
VP
1392static void *qemu_hax_cpu_thread_fn(void *arg)
1393{
1394 CPUState *cpu = arg;
1395 int r;
b3d3a426
VP
1396
1397 qemu_mutex_lock_iothread();
b0cb0a66 1398 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1399
1400 cpu->thread_id = qemu_get_thread_id();
1401 cpu->created = true;
1402 cpu->halted = 0;
1403 current_cpu = cpu;
1404
1405 hax_init_vcpu(cpu);
1406 qemu_cond_signal(&qemu_cpu_cond);
1407
1408 while (1) {
1409 if (cpu_can_run(cpu)) {
1410 r = hax_smp_cpu_exec(cpu);
1411 if (r == EXCP_DEBUG) {
1412 cpu_handle_guest_debug(cpu);
1413 }
1414 }
1415
1416 while (cpu_thread_is_idle(cpu)) {
1417 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1418 }
1419#ifdef _WIN32
1420 SleepEx(0, TRUE);
1421#endif
1422 qemu_wait_io_event_common(cpu);
1423 }
1424 return NULL;
1425}
1426
1427#ifdef _WIN32
1428static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1429{
1430}
1431#endif
1432
37257942
AB
1433/* Multi-threaded TCG
1434 *
1435 * In the multi-threaded case each vCPU has its own thread. The TLS
1436 * variable current_cpu can be used deep in the code to find the
1437 * current CPUState for a given thread.
1438 */
1439
1440static void *qemu_tcg_cpu_thread_fn(void *arg)
1441{
1442 CPUState *cpu = arg;
1443
bf51c720
AB
1444 g_assert(!use_icount);
1445
37257942
AB
1446 rcu_register_thread();
1447
1448 qemu_mutex_lock_iothread();
1449 qemu_thread_get_self(cpu->thread);
1450
1451 cpu->thread_id = qemu_get_thread_id();
1452 cpu->created = true;
1453 cpu->can_do_io = 1;
1454 current_cpu = cpu;
1455 qemu_cond_signal(&qemu_cpu_cond);
1456
1457 /* process any pending work */
1458 cpu->exit_request = 1;
1459
1460 while (1) {
1461 if (cpu_can_run(cpu)) {
1462 int r;
1463 r = tcg_cpu_exec(cpu);
1464 switch (r) {
1465 case EXCP_DEBUG:
1466 cpu_handle_guest_debug(cpu);
1467 break;
1468 case EXCP_HALTED:
1469 /* during start-up the vCPU is reset and the thread is
1470 * kicked several times. If we don't ensure we go back
1471 * to sleep in the halted state we won't cleanly
1472 * start-up when the vCPU is enabled.
1473 *
1474 * cpu->halted should ensure we sleep in wait_io_event
1475 */
1476 g_assert(cpu->halted);
1477 break;
08e73c48
PK
1478 case EXCP_ATOMIC:
1479 qemu_mutex_unlock_iothread();
1480 cpu_exec_step_atomic(cpu);
1481 qemu_mutex_lock_iothread();
37257942
AB
1482 default:
1483 /* Ignore everything else? */
1484 break;
1485 }
1486 }
1487
37257942
AB
1488 atomic_mb_set(&cpu->exit_request, 0);
1489 qemu_tcg_wait_io_event(cpu);
1490 }
1491
1492 return NULL;
1493}
1494
2ff09a40 1495static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1496{
1497#ifndef _WIN32
1498 int err;
1499
e0c38211
PB
1500 if (cpu->thread_kicked) {
1501 return;
9102deda 1502 }
e0c38211 1503 cpu->thread_kicked = true;
814e612e 1504 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1505 if (err) {
1506 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1507 exit(1);
1508 }
1509#else /* _WIN32 */
b0cb0a66
VP
1510 if (!qemu_cpu_is_self(cpu)) {
1511 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1512 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1513 __func__, GetLastError());
1514 exit(1);
1515 }
1516 }
e0c38211
PB
1517#endif
1518}
ed9164a3 1519
c08d7424 1520void qemu_cpu_kick(CPUState *cpu)
296af7c9 1521{
f5c121b8 1522 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1523 if (tcg_enabled()) {
791158d9 1524 cpu_exit(cpu);
37257942 1525 /* NOP unless doing single-thread RR */
791158d9 1526 qemu_cpu_kick_rr_cpu();
e0c38211 1527 } else {
b0cb0a66
VP
1528 if (hax_enabled()) {
1529 /*
1530 * FIXME: race condition with the exit_request check in
1531 * hax_vcpu_hax_exec
1532 */
1533 cpu->exit_request = 1;
1534 }
e0c38211
PB
1535 qemu_cpu_kick_thread(cpu);
1536 }
296af7c9
BS
1537}
1538
46d62fac 1539void qemu_cpu_kick_self(void)
296af7c9 1540{
4917cf44 1541 assert(current_cpu);
9102deda 1542 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1543}
1544
60e82579 1545bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1546{
814e612e 1547 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1548}
1549
79e2b9ae 1550bool qemu_in_vcpu_thread(void)
aa723c23 1551{
4917cf44 1552 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1553}
1554
afbe7053
PB
1555static __thread bool iothread_locked = false;
1556
1557bool qemu_mutex_iothread_locked(void)
1558{
1559 return iothread_locked;
1560}
1561
296af7c9
BS
1562void qemu_mutex_lock_iothread(void)
1563{
8d04fb55
JK
1564 g_assert(!qemu_mutex_iothread_locked());
1565 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1566 iothread_locked = true;
296af7c9
BS
1567}
1568
1569void qemu_mutex_unlock_iothread(void)
1570{
8d04fb55 1571 g_assert(qemu_mutex_iothread_locked());
afbe7053 1572 iothread_locked = false;
296af7c9
BS
1573 qemu_mutex_unlock(&qemu_global_mutex);
1574}
1575
e8faee06 1576static bool all_vcpus_paused(void)
296af7c9 1577{
bdc44640 1578 CPUState *cpu;
296af7c9 1579
bdc44640 1580 CPU_FOREACH(cpu) {
182735ef 1581 if (!cpu->stopped) {
e8faee06 1582 return false;
0ab07c62 1583 }
296af7c9
BS
1584 }
1585
e8faee06 1586 return true;
296af7c9
BS
1587}
1588
1589void pause_all_vcpus(void)
1590{
bdc44640 1591 CPUState *cpu;
296af7c9 1592
40daca54 1593 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1594 CPU_FOREACH(cpu) {
182735ef
AF
1595 cpu->stop = true;
1596 qemu_cpu_kick(cpu);
296af7c9
BS
1597 }
1598
aa723c23 1599 if (qemu_in_vcpu_thread()) {
d798e974 1600 cpu_stop_current();
d798e974
JK
1601 }
1602
296af7c9 1603 while (!all_vcpus_paused()) {
be7d6c57 1604 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1605 CPU_FOREACH(cpu) {
182735ef 1606 qemu_cpu_kick(cpu);
296af7c9
BS
1607 }
1608 }
1609}
1610
2993683b
IM
1611void cpu_resume(CPUState *cpu)
1612{
1613 cpu->stop = false;
1614 cpu->stopped = false;
1615 qemu_cpu_kick(cpu);
1616}
1617
296af7c9
BS
1618void resume_all_vcpus(void)
1619{
bdc44640 1620 CPUState *cpu;
296af7c9 1621
40daca54 1622 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1623 CPU_FOREACH(cpu) {
182735ef 1624 cpu_resume(cpu);
296af7c9
BS
1625 }
1626}
1627
4c055ab5
GZ
1628void cpu_remove(CPUState *cpu)
1629{
1630 cpu->stop = true;
1631 cpu->unplug = true;
1632 qemu_cpu_kick(cpu);
1633}
1634
2c579042
BR
1635void cpu_remove_sync(CPUState *cpu)
1636{
1637 cpu_remove(cpu);
1638 while (cpu->created) {
1639 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1640 }
1641}
1642
4900116e
DDAG
1643/* For temporary buffers for forming a name */
1644#define VCPU_THREAD_NAME_SIZE 16
1645
e5ab30a2 1646static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1647{
4900116e 1648 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1649 static QemuCond *single_tcg_halt_cond;
1650 static QemuThread *single_tcg_cpu_thread;
4900116e 1651
37257942 1652 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1653 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1654 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1655 qemu_cond_init(cpu->halt_cond);
37257942
AB
1656
1657 if (qemu_tcg_mttcg_enabled()) {
1658 /* create a thread per vCPU with TCG (MTTCG) */
1659 parallel_cpus = true;
1660 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1661 cpu->cpu_index);
37257942
AB
1662
1663 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1664 cpu, QEMU_THREAD_JOINABLE);
1665
1666 } else {
1667 /* share a single thread for all cpus with TCG */
1668 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1669 qemu_thread_create(cpu->thread, thread_name,
1670 qemu_tcg_rr_cpu_thread_fn,
1671 cpu, QEMU_THREAD_JOINABLE);
1672
1673 single_tcg_halt_cond = cpu->halt_cond;
1674 single_tcg_cpu_thread = cpu->thread;
1675 }
1ecf47bf 1676#ifdef _WIN32
814e612e 1677 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1678#endif
61a46217 1679 while (!cpu->created) {
18a85728 1680 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1681 }
296af7c9 1682 } else {
37257942
AB
1683 /* For non-MTTCG cases we share the thread */
1684 cpu->thread = single_tcg_cpu_thread;
1685 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1686 }
1687}
1688
b0cb0a66
VP
1689static void qemu_hax_start_vcpu(CPUState *cpu)
1690{
1691 char thread_name[VCPU_THREAD_NAME_SIZE];
1692
1693 cpu->thread = g_malloc0(sizeof(QemuThread));
1694 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1695 qemu_cond_init(cpu->halt_cond);
1696
1697 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1698 cpu->cpu_index);
1699 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1700 cpu, QEMU_THREAD_JOINABLE);
1701#ifdef _WIN32
1702 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1703#endif
1704 while (!cpu->created) {
1705 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1706 }
1707}
1708
48a106bd 1709static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1710{
4900116e
DDAG
1711 char thread_name[VCPU_THREAD_NAME_SIZE];
1712
814e612e 1713 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1714 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1715 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1716 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1717 cpu->cpu_index);
1718 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1719 cpu, QEMU_THREAD_JOINABLE);
61a46217 1720 while (!cpu->created) {
18a85728 1721 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1722 }
296af7c9
BS
1723}
1724
10a9021d 1725static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1726{
4900116e
DDAG
1727 char thread_name[VCPU_THREAD_NAME_SIZE];
1728
814e612e 1729 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1730 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1731 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1732 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1733 cpu->cpu_index);
1734 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1735 QEMU_THREAD_JOINABLE);
61a46217 1736 while (!cpu->created) {
c7f0f3b1
AL
1737 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1738 }
1739}
1740
c643bed9 1741void qemu_init_vcpu(CPUState *cpu)
296af7c9 1742{
ce3960eb
AF
1743 cpu->nr_cores = smp_cores;
1744 cpu->nr_threads = smp_threads;
f324e766 1745 cpu->stopped = true;
56943e8c
PM
1746
1747 if (!cpu->as) {
1748 /* If the target cpu hasn't set up any address spaces itself,
1749 * give it the default one.
1750 */
6731d864
PC
1751 AddressSpace *as = address_space_init_shareable(cpu->memory,
1752 "cpu-memory");
12ebc9a7 1753 cpu->num_ases = 1;
6731d864 1754 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1755 }
1756
0ab07c62 1757 if (kvm_enabled()) {
48a106bd 1758 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1759 } else if (hax_enabled()) {
1760 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1761 } else if (tcg_enabled()) {
e5ab30a2 1762 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1763 } else {
10a9021d 1764 qemu_dummy_start_vcpu(cpu);
0ab07c62 1765 }
296af7c9
BS
1766}
1767
b4a3d965 1768void cpu_stop_current(void)
296af7c9 1769{
4917cf44
AF
1770 if (current_cpu) {
1771 current_cpu->stop = false;
1772 current_cpu->stopped = true;
1773 cpu_exit(current_cpu);
96bce683 1774 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1775 }
296af7c9
BS
1776}
1777
56983463 1778int vm_stop(RunState state)
296af7c9 1779{
aa723c23 1780 if (qemu_in_vcpu_thread()) {
74892d24 1781 qemu_system_vmstop_request_prepare();
1dfb4dd9 1782 qemu_system_vmstop_request(state);
296af7c9
BS
1783 /*
1784 * FIXME: should not return to device code in case
1785 * vm_stop() has been requested.
1786 */
b4a3d965 1787 cpu_stop_current();
56983463 1788 return 0;
296af7c9 1789 }
56983463
KW
1790
1791 return do_vm_stop(state);
296af7c9
BS
1792}
1793
2d76e823
CI
1794/**
1795 * Prepare for (re)starting the VM.
1796 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1797 * running or in case of an error condition), 0 otherwise.
1798 */
1799int vm_prepare_start(void)
1800{
1801 RunState requested;
1802 int res = 0;
1803
1804 qemu_vmstop_requested(&requested);
1805 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1806 return -1;
1807 }
1808
1809 /* Ensure that a STOP/RESUME pair of events is emitted if a
1810 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1811 * example, according to documentation is always followed by
1812 * the STOP event.
1813 */
1814 if (runstate_is_running()) {
1815 qapi_event_send_stop(&error_abort);
1816 res = -1;
1817 } else {
1818 replay_enable_events();
1819 cpu_enable_ticks();
1820 runstate_set(RUN_STATE_RUNNING);
1821 vm_state_notify(1, RUN_STATE_RUNNING);
1822 }
1823
1824 /* We are sending this now, but the CPUs will be resumed shortly later */
1825 qapi_event_send_resume(&error_abort);
1826 return res;
1827}
1828
1829void vm_start(void)
1830{
1831 if (!vm_prepare_start()) {
1832 resume_all_vcpus();
1833 }
1834}
1835
8a9236f1
LC
1836/* does a state transition even if the VM is already stopped,
1837 current state is forgotten forever */
56983463 1838int vm_stop_force_state(RunState state)
8a9236f1
LC
1839{
1840 if (runstate_is_running()) {
56983463 1841 return vm_stop(state);
8a9236f1
LC
1842 } else {
1843 runstate_set(state);
b2780d32
WC
1844
1845 bdrv_drain_all();
594a45ce
KW
1846 /* Make sure to return an error if the flush in a previous vm_stop()
1847 * failed. */
22af08ea 1848 return bdrv_flush_all();
8a9236f1
LC
1849 }
1850}
1851
9a78eead 1852void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1853{
1854 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1855#if defined(cpu_list)
1856 cpu_list(f, cpu_fprintf);
262353cb
BS
1857#endif
1858}
de0b36b6
LC
1859
1860CpuInfoList *qmp_query_cpus(Error **errp)
1861{
1862 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1863 CPUState *cpu;
de0b36b6 1864
bdc44640 1865 CPU_FOREACH(cpu) {
de0b36b6 1866 CpuInfoList *info;
182735ef
AF
1867#if defined(TARGET_I386)
1868 X86CPU *x86_cpu = X86_CPU(cpu);
1869 CPUX86State *env = &x86_cpu->env;
1870#elif defined(TARGET_PPC)
1871 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1872 CPUPPCState *env = &ppc_cpu->env;
1873#elif defined(TARGET_SPARC)
1874 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1875 CPUSPARCState *env = &sparc_cpu->env;
1876#elif defined(TARGET_MIPS)
1877 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1878 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1879#elif defined(TARGET_TRICORE)
1880 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1881 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1882#endif
de0b36b6 1883
cb446eca 1884 cpu_synchronize_state(cpu);
de0b36b6
LC
1885
1886 info = g_malloc0(sizeof(*info));
1887 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1888 info->value->CPU = cpu->cpu_index;
182735ef 1889 info->value->current = (cpu == first_cpu);
259186a7 1890 info->value->halted = cpu->halted;
58f88d4b 1891 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1892 info->value->thread_id = cpu->thread_id;
de0b36b6 1893#if defined(TARGET_I386)
86f4b687 1894 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1895 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1896#elif defined(TARGET_PPC)
86f4b687 1897 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1898 info->value->u.ppc.nip = env->nip;
de0b36b6 1899#elif defined(TARGET_SPARC)
86f4b687 1900 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1901 info->value->u.q_sparc.pc = env->pc;
1902 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1903#elif defined(TARGET_MIPS)
86f4b687 1904 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1905 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1906#elif defined(TARGET_TRICORE)
86f4b687 1907 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1908 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1909#else
1910 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1911#endif
1912
1913 /* XXX: waiting for the qapi to support GSList */
1914 if (!cur_item) {
1915 head = cur_item = info;
1916 } else {
1917 cur_item->next = info;
1918 cur_item = info;
1919 }
1920 }
1921
1922 return head;
1923}
0cfd6a9a
LC
1924
1925void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1926 bool has_cpu, int64_t cpu_index, Error **errp)
1927{
1928 FILE *f;
1929 uint32_t l;
55e5c285 1930 CPUState *cpu;
0cfd6a9a 1931 uint8_t buf[1024];
0dc9daf0 1932 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1933
1934 if (!has_cpu) {
1935 cpu_index = 0;
1936 }
1937
151d1322
AF
1938 cpu = qemu_get_cpu(cpu_index);
1939 if (cpu == NULL) {
c6bd8c70
MA
1940 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1941 "a CPU number");
0cfd6a9a
LC
1942 return;
1943 }
1944
1945 f = fopen(filename, "wb");
1946 if (!f) {
618da851 1947 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1948 return;
1949 }
1950
1951 while (size != 0) {
1952 l = sizeof(buf);
1953 if (l > size)
1954 l = size;
2f4d0f59 1955 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1956 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1957 " specified", orig_addr, orig_size);
2f4d0f59
AK
1958 goto exit;
1959 }
0cfd6a9a 1960 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1961 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1962 goto exit;
1963 }
1964 addr += l;
1965 size -= l;
1966 }
1967
1968exit:
1969 fclose(f);
1970}
6d3962bf
LC
1971
1972void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1973 Error **errp)
1974{
1975 FILE *f;
1976 uint32_t l;
1977 uint8_t buf[1024];
1978
1979 f = fopen(filename, "wb");
1980 if (!f) {
618da851 1981 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1982 return;
1983 }
1984
1985 while (size != 0) {
1986 l = sizeof(buf);
1987 if (l > size)
1988 l = size;
eb6282f2 1989 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1990 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1991 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1992 goto exit;
1993 }
1994 addr += l;
1995 size -= l;
1996 }
1997
1998exit:
1999 fclose(f);
2000}
ab49ab5c
LC
2001
2002void qmp_inject_nmi(Error **errp)
2003{
9cb805fd 2004 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2005}
27498bef
ST
2006
2007void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2008{
2009 if (!use_icount) {
2010 return;
2011 }
2012
2013 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2014 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2015 if (icount_align_option) {
2016 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2017 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2018 } else {
2019 cpu_fprintf(f, "Max guest delay NA\n");
2020 cpu_fprintf(f, "Max guest advance NA\n");
2021 }
2022}