]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
cpus: don't credit executed instructions before they have run
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
0ff0fc19 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
27498bef
ST
72int64_t max_delay;
73int64_t max_advance;
296af7c9 74
2adcc85d
JH
75/* vcpu throttling controls */
76static QEMUTimer *throttle_timer;
77static unsigned int throttle_percentage;
78
79#define CPU_THROTTLE_PCT_MIN 1
80#define CPU_THROTTLE_PCT_MAX 99
81#define CPU_THROTTLE_TIMESLICE_NS 10000000
82
321bc0b2
TC
83bool cpu_is_stopped(CPUState *cpu)
84{
85 return cpu->stopped || !runstate_is_running();
86}
87
a98ae1d8 88static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 89{
c64ca814 90 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
91 return false;
92 }
321bc0b2 93 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
94 return true;
95 }
8c2e1b00 96 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 97 kvm_halt_in_kernel()) {
ac873f1e
PM
98 return false;
99 }
100 return true;
101}
102
103static bool all_cpu_threads_idle(void)
104{
182735ef 105 CPUState *cpu;
ac873f1e 106
bdc44640 107 CPU_FOREACH(cpu) {
182735ef 108 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
109 return false;
110 }
111 }
112 return true;
113}
114
946fb27c
PB
115/***********************************************************/
116/* guest cycle counter */
117
a3270e19
PB
118/* Protected by TimersState seqlock */
119
5045e9d9 120static bool icount_sleep = true;
71468395 121static int64_t vm_clock_warp_start = -1;
946fb27c
PB
122/* Conversion factor from emulated instructions to virtual clock ticks. */
123static int icount_time_shift;
124/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125#define MAX_ICOUNT_SHIFT 10
a3270e19 126
946fb27c
PB
127static QEMUTimer *icount_rt_timer;
128static QEMUTimer *icount_vm_timer;
129static QEMUTimer *icount_warp_timer;
946fb27c
PB
130
131typedef struct TimersState {
cb365646 132 /* Protected by BQL. */
946fb27c
PB
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
cb365646
LPF
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
c96778bb
FK
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
946fb27c
PB
148} TimersState;
149
d9cd4007 150static TimersState timers_state;
8d4e9146
FK
151bool mttcg_enabled;
152
153/*
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
160 *
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
163 *
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
166 *
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
171 */
172
173static bool check_tcg_memory_orders_compatible(void)
174{
175#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177#else
178 return false;
179#endif
180}
181
182static bool default_mttcg_enabled(void)
183{
83fd9629 184 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
185 return false;
186 } else {
187#ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
189#else
190 return false;
191#endif
192 }
193}
194
195void qemu_tcg_configure(QemuOpts *opts, Error **errp)
196{
197 const char *t = qemu_opt_get(opts, "thread");
198 if (t) {
199 if (strcmp(t, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST) {
201 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
202 } else if (use_icount) {
203 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 204 } else {
86953503 205#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
208#endif
8d4e9146
FK
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
8cfef892 212 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
213 }
214 mttcg_enabled = true;
215 }
216 } else if (strcmp(t, "single") == 0) {
217 mttcg_enabled = false;
218 } else {
219 error_setg(errp, "Invalid 'thread' setting %s", t);
220 }
221 } else {
222 mttcg_enabled = default_mttcg_enabled();
223 }
224}
946fb27c 225
e4cd9657
AB
226/* The current number of executed instructions is based on what we
227 * originally budgeted minus the current state of the decrementing
228 * icount counters in extra/u16.low.
229 */
230static int64_t cpu_get_icount_executed(CPUState *cpu)
231{
232 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
233}
234
2a62914b 235int64_t cpu_get_icount_raw(void)
946fb27c
PB
236{
237 int64_t icount;
4917cf44 238 CPUState *cpu = current_cpu;
946fb27c 239
c96778bb 240 icount = timers_state.qemu_icount;
243c5f77 241 if (cpu && cpu->running) {
414b15c9 242 if (!cpu->can_do_io) {
2a62914b
PD
243 fprintf(stderr, "Bad icount read\n");
244 exit(1);
946fb27c 245 }
e4cd9657
AB
246 /* Take into account what has run */
247 icount += cpu_get_icount_executed(cpu);
946fb27c 248 }
2a62914b
PD
249 return icount;
250}
251
252/* Return the virtual CPU time, based on the instruction counter. */
253static int64_t cpu_get_icount_locked(void)
254{
255 int64_t icount = cpu_get_icount_raw();
3f031313 256 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
257}
258
17a15f1b
PB
259int64_t cpu_get_icount(void)
260{
261 int64_t icount;
262 unsigned start;
263
264 do {
265 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
266 icount = cpu_get_icount_locked();
267 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
268
269 return icount;
270}
271
3f031313
FK
272int64_t cpu_icount_to_ns(int64_t icount)
273{
274 return icount << icount_time_shift;
275}
276
d90f3cca
C
277/* return the time elapsed in VM between vm_start and vm_stop. Unless
278 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
279 * counter.
280 *
281 * Caller must hold the BQL
282 */
946fb27c
PB
283int64_t cpu_get_ticks(void)
284{
5f3e3101
PB
285 int64_t ticks;
286
946fb27c
PB
287 if (use_icount) {
288 return cpu_get_icount();
289 }
5f3e3101
PB
290
291 ticks = timers_state.cpu_ticks_offset;
292 if (timers_state.cpu_ticks_enabled) {
4a7428c5 293 ticks += cpu_get_host_ticks();
5f3e3101
PB
294 }
295
296 if (timers_state.cpu_ticks_prev > ticks) {
297 /* Note: non increasing ticks may happen if the host uses
298 software suspend */
299 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
300 ticks = timers_state.cpu_ticks_prev;
946fb27c 301 }
5f3e3101
PB
302
303 timers_state.cpu_ticks_prev = ticks;
304 return ticks;
946fb27c
PB
305}
306
cb365646 307static int64_t cpu_get_clock_locked(void)
946fb27c 308{
1d45cea5 309 int64_t time;
cb365646 310
1d45cea5 311 time = timers_state.cpu_clock_offset;
5f3e3101 312 if (timers_state.cpu_ticks_enabled) {
1d45cea5 313 time += get_clock();
946fb27c 314 }
cb365646 315
1d45cea5 316 return time;
cb365646
LPF
317}
318
d90f3cca 319/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
320 * the time between vm_start and vm_stop
321 */
cb365646
LPF
322int64_t cpu_get_clock(void)
323{
324 int64_t ti;
325 unsigned start;
326
327 do {
328 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
329 ti = cpu_get_clock_locked();
330 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
331
332 return ti;
946fb27c
PB
333}
334
cb365646 335/* enable cpu_get_ticks()
3224e878 336 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 337 */
946fb27c
PB
338void cpu_enable_ticks(void)
339{
cb365646 340 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 341 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 342 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 343 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
344 timers_state.cpu_clock_offset -= get_clock();
345 timers_state.cpu_ticks_enabled = 1;
346 }
03719e44 347 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
348}
349
350/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 351 * cpu_get_ticks() after that.
3224e878 352 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 353 */
946fb27c
PB
354void cpu_disable_ticks(void)
355{
cb365646 356 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 357 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 358 if (timers_state.cpu_ticks_enabled) {
4a7428c5 359 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 360 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
361 timers_state.cpu_ticks_enabled = 0;
362 }
03719e44 363 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
364}
365
366/* Correlation between real and virtual time is always going to be
367 fairly approximate, so ignore small variation.
368 When the guest is idle real and virtual time will be aligned in
369 the IO wait loop. */
73bcb24d 370#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
371
372static void icount_adjust(void)
373{
374 int64_t cur_time;
375 int64_t cur_icount;
376 int64_t delta;
a3270e19
PB
377
378 /* Protected by TimersState mutex. */
946fb27c 379 static int64_t last_delta;
468cc7cf 380
946fb27c
PB
381 /* If the VM is not running, then do nothing. */
382 if (!runstate_is_running()) {
383 return;
384 }
468cc7cf 385
03719e44 386 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
387 cur_time = cpu_get_clock_locked();
388 cur_icount = cpu_get_icount_locked();
468cc7cf 389
946fb27c
PB
390 delta = cur_icount - cur_time;
391 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
392 if (delta > 0
393 && last_delta + ICOUNT_WOBBLE < delta * 2
394 && icount_time_shift > 0) {
395 /* The guest is getting too far ahead. Slow time down. */
396 icount_time_shift--;
397 }
398 if (delta < 0
399 && last_delta - ICOUNT_WOBBLE > delta * 2
400 && icount_time_shift < MAX_ICOUNT_SHIFT) {
401 /* The guest is getting too far behind. Speed time up. */
402 icount_time_shift++;
403 }
404 last_delta = delta;
c96778bb
FK
405 timers_state.qemu_icount_bias = cur_icount
406 - (timers_state.qemu_icount << icount_time_shift);
03719e44 407 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
408}
409
410static void icount_adjust_rt(void *opaque)
411{
40daca54 412 timer_mod(icount_rt_timer,
1979b908 413 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
414 icount_adjust();
415}
416
417static void icount_adjust_vm(void *opaque)
418{
40daca54
AB
419 timer_mod(icount_vm_timer,
420 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 421 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
422 icount_adjust();
423}
424
425static int64_t qemu_icount_round(int64_t count)
426{
427 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
428}
429
efab87cf 430static void icount_warp_rt(void)
946fb27c 431{
ccffff48
AB
432 unsigned seq;
433 int64_t warp_start;
434
17a15f1b
PB
435 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
436 * changes from -1 to another value, so the race here is okay.
437 */
ccffff48
AB
438 do {
439 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
440 warp_start = vm_clock_warp_start;
441 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
442
443 if (warp_start == -1) {
946fb27c
PB
444 return;
445 }
446
03719e44 447 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 448 if (runstate_is_running()) {
8eda206e
PD
449 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
450 cpu_get_clock_locked());
8ed961d9
PB
451 int64_t warp_delta;
452
453 warp_delta = clock - vm_clock_warp_start;
454 if (use_icount == 2) {
946fb27c 455 /*
40daca54 456 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
457 * far ahead of real time.
458 */
17a15f1b 459 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 460 int64_t delta = clock - cur_icount;
8ed961d9 461 warp_delta = MIN(warp_delta, delta);
946fb27c 462 }
c96778bb 463 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
464 }
465 vm_clock_warp_start = -1;
03719e44 466 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
467
468 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
469 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
470 }
946fb27c
PB
471}
472
e76d1798 473static void icount_timer_cb(void *opaque)
efab87cf 474{
e76d1798
PD
475 /* No need for a checkpoint because the timer already synchronizes
476 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
477 */
478 icount_warp_rt();
efab87cf
PD
479}
480
8156be56
PB
481void qtest_clock_warp(int64_t dest)
482{
40daca54 483 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 484 AioContext *aio_context;
8156be56 485 assert(qtest_enabled());
efef88b3 486 aio_context = qemu_get_aio_context();
8156be56 487 while (clock < dest) {
40daca54 488 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 489 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 490
03719e44 491 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 492 timers_state.qemu_icount_bias += warp;
03719e44 493 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 494
40daca54 495 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 496 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 497 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 498 }
40daca54 499 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
500}
501
e76d1798 502void qemu_start_warp_timer(void)
946fb27c 503{
ce78d18c 504 int64_t clock;
946fb27c
PB
505 int64_t deadline;
506
e76d1798 507 if (!use_icount) {
946fb27c
PB
508 return;
509 }
510
8bd7f71d
PD
511 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
512 * do not fire, so computing the deadline does not make sense.
513 */
514 if (!runstate_is_running()) {
515 return;
516 }
517
518 /* warp clock deterministically in record/replay mode */
e76d1798 519 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
520 return;
521 }
522
ce78d18c 523 if (!all_cpu_threads_idle()) {
946fb27c
PB
524 return;
525 }
526
8156be56
PB
527 if (qtest_enabled()) {
528 /* When testing, qtest commands advance icount. */
e76d1798 529 return;
8156be56
PB
530 }
531
ac70aafc 532 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 533 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 534 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 535 if (deadline < 0) {
d7a0f71d
VC
536 static bool notified;
537 if (!icount_sleep && !notified) {
538 error_report("WARNING: icount sleep disabled and no active timers");
539 notified = true;
540 }
ce78d18c 541 return;
ac70aafc
AB
542 }
543
946fb27c
PB
544 if (deadline > 0) {
545 /*
40daca54 546 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
547 * sleep. Otherwise, the CPU might be waiting for a future timer
548 * interrupt to wake it up, but the interrupt never comes because
549 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 550 * QEMU_CLOCK_VIRTUAL.
946fb27c 551 */
5045e9d9
VC
552 if (!icount_sleep) {
553 /*
554 * We never let VCPUs sleep in no sleep icount mode.
555 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
556 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
557 * It is useful when we want a deterministic execution time,
558 * isolated from host latencies.
559 */
03719e44 560 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 561 timers_state.qemu_icount_bias += deadline;
03719e44 562 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
563 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
564 } else {
565 /*
566 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
567 * "real" time, (related to the time left until the next event) has
568 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
569 * This avoids that the warps are visible externally; for example,
570 * you will not be sending network packets continuously instead of
571 * every 100ms.
572 */
03719e44 573 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
574 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
575 vm_clock_warp_start = clock;
576 }
03719e44 577 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 578 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 579 }
ac70aafc 580 } else if (deadline == 0) {
40daca54 581 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
582 }
583}
584
e76d1798
PD
585static void qemu_account_warp_timer(void)
586{
587 if (!use_icount || !icount_sleep) {
588 return;
589 }
590
591 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
592 * do not fire, so computing the deadline does not make sense.
593 */
594 if (!runstate_is_running()) {
595 return;
596 }
597
598 /* warp clock deterministically in record/replay mode */
599 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
600 return;
601 }
602
603 timer_del(icount_warp_timer);
604 icount_warp_rt();
605}
606
d09eae37
FK
607static bool icount_state_needed(void *opaque)
608{
609 return use_icount;
610}
611
612/*
613 * This is a subsection for icount migration.
614 */
615static const VMStateDescription icount_vmstate_timers = {
616 .name = "timer/icount",
617 .version_id = 1,
618 .minimum_version_id = 1,
5cd8cada 619 .needed = icount_state_needed,
d09eae37
FK
620 .fields = (VMStateField[]) {
621 VMSTATE_INT64(qemu_icount_bias, TimersState),
622 VMSTATE_INT64(qemu_icount, TimersState),
623 VMSTATE_END_OF_LIST()
624 }
625};
626
946fb27c
PB
627static const VMStateDescription vmstate_timers = {
628 .name = "timer",
629 .version_id = 2,
630 .minimum_version_id = 1,
35d08458 631 .fields = (VMStateField[]) {
946fb27c
PB
632 VMSTATE_INT64(cpu_ticks_offset, TimersState),
633 VMSTATE_INT64(dummy, TimersState),
634 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
635 VMSTATE_END_OF_LIST()
d09eae37 636 },
5cd8cada
JQ
637 .subsections = (const VMStateDescription*[]) {
638 &icount_vmstate_timers,
639 NULL
946fb27c
PB
640 }
641};
642
14e6fe12 643static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 644{
2adcc85d
JH
645 double pct;
646 double throttle_ratio;
647 long sleeptime_ns;
648
649 if (!cpu_throttle_get_percentage()) {
650 return;
651 }
652
653 pct = (double)cpu_throttle_get_percentage()/100;
654 throttle_ratio = pct / (1 - pct);
655 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
656
657 qemu_mutex_unlock_iothread();
658 atomic_set(&cpu->throttle_thread_scheduled, 0);
659 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
660 qemu_mutex_lock_iothread();
661}
662
663static void cpu_throttle_timer_tick(void *opaque)
664{
665 CPUState *cpu;
666 double pct;
667
668 /* Stop the timer if needed */
669 if (!cpu_throttle_get_percentage()) {
670 return;
671 }
672 CPU_FOREACH(cpu) {
673 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
674 async_run_on_cpu(cpu, cpu_throttle_thread,
675 RUN_ON_CPU_NULL);
2adcc85d
JH
676 }
677 }
678
679 pct = (double)cpu_throttle_get_percentage()/100;
680 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
681 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
682}
683
684void cpu_throttle_set(int new_throttle_pct)
685{
686 /* Ensure throttle percentage is within valid range */
687 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
688 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
689
690 atomic_set(&throttle_percentage, new_throttle_pct);
691
692 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
693 CPU_THROTTLE_TIMESLICE_NS);
694}
695
696void cpu_throttle_stop(void)
697{
698 atomic_set(&throttle_percentage, 0);
699}
700
701bool cpu_throttle_active(void)
702{
703 return (cpu_throttle_get_percentage() != 0);
704}
705
706int cpu_throttle_get_percentage(void)
707{
708 return atomic_read(&throttle_percentage);
709}
710
4603ea01
PD
711void cpu_ticks_init(void)
712{
ccdb3c1f 713 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 714 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
715 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
716 cpu_throttle_timer_tick, NULL);
4603ea01
PD
717}
718
1ad9580b 719void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 720{
1ad9580b 721 const char *option;
a8bfac37 722 char *rem_str = NULL;
1ad9580b 723
1ad9580b 724 option = qemu_opt_get(opts, "shift");
946fb27c 725 if (!option) {
a8bfac37
ST
726 if (qemu_opt_get(opts, "align") != NULL) {
727 error_setg(errp, "Please specify shift option when using align");
728 }
946fb27c
PB
729 return;
730 }
f1f4b57e
VC
731
732 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
733 if (icount_sleep) {
734 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 735 icount_timer_cb, NULL);
5045e9d9 736 }
f1f4b57e 737
a8bfac37 738 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
739
740 if (icount_align_option && !icount_sleep) {
778d9f9b 741 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 742 }
946fb27c 743 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
744 errno = 0;
745 icount_time_shift = strtol(option, &rem_str, 0);
746 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
747 error_setg(errp, "icount: Invalid shift value");
748 }
946fb27c
PB
749 use_icount = 1;
750 return;
a8bfac37
ST
751 } else if (icount_align_option) {
752 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 753 } else if (!icount_sleep) {
778d9f9b 754 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
755 }
756
757 use_icount = 2;
758
759 /* 125MIPS seems a reasonable initial guess at the guest speed.
760 It will be corrected fairly quickly anyway. */
761 icount_time_shift = 3;
762
763 /* Have both realtime and virtual time triggers for speed adjustment.
764 The realtime trigger catches emulated time passing too slowly,
765 the virtual time trigger catches emulated time passing too fast.
766 Realtime triggers occur even when idle, so use them less frequently
767 than VM triggers. */
bf2a7ddb
PD
768 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
769 icount_adjust_rt, NULL);
40daca54 770 timer_mod(icount_rt_timer,
bf2a7ddb 771 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
772 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
773 icount_adjust_vm, NULL);
774 timer_mod(icount_vm_timer,
775 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 776 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
777}
778
6546706d
AB
779/***********************************************************/
780/* TCG vCPU kick timer
781 *
782 * The kick timer is responsible for moving single threaded vCPU
783 * emulation on to the next vCPU. If more than one vCPU is running a
784 * timer event with force a cpu->exit so the next vCPU can get
785 * scheduled.
786 *
787 * The timer is removed if all vCPUs are idle and restarted again once
788 * idleness is complete.
789 */
790
791static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 792static CPUState *tcg_current_rr_cpu;
6546706d
AB
793
794#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
795
796static inline int64_t qemu_tcg_next_kick(void)
797{
798 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
799}
800
791158d9
AB
801/* Kick the currently round-robin scheduled vCPU */
802static void qemu_cpu_kick_rr_cpu(void)
803{
804 CPUState *cpu;
791158d9
AB
805 do {
806 cpu = atomic_mb_read(&tcg_current_rr_cpu);
807 if (cpu) {
808 cpu_exit(cpu);
809 }
810 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
811}
812
6b8f0187
PB
813static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
814{
815}
816
3f53bc61
PB
817void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
818{
6b8f0187
PB
819 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
820 qemu_notify_event();
821 return;
822 }
823
824 if (!qemu_in_vcpu_thread() && first_cpu) {
825 /* qemu_cpu_kick is not enough to kick a halted CPU out of
826 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
827 * causes cpu_thread_is_idle to return false. This way,
828 * handle_icount_deadline can run.
829 */
830 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
831 }
3f53bc61
PB
832}
833
6546706d
AB
834static void kick_tcg_thread(void *opaque)
835{
836 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 837 qemu_cpu_kick_rr_cpu();
6546706d
AB
838}
839
840static void start_tcg_kick_timer(void)
841{
37257942 842 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
843 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
844 kick_tcg_thread, NULL);
845 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
846 }
847}
848
849static void stop_tcg_kick_timer(void)
850{
851 if (tcg_kick_vcpu_timer) {
852 timer_del(tcg_kick_vcpu_timer);
853 tcg_kick_vcpu_timer = NULL;
854 }
855}
856
296af7c9
BS
857/***********************************************************/
858void hw_error(const char *fmt, ...)
859{
860 va_list ap;
55e5c285 861 CPUState *cpu;
296af7c9
BS
862
863 va_start(ap, fmt);
864 fprintf(stderr, "qemu: hardware error: ");
865 vfprintf(stderr, fmt, ap);
866 fprintf(stderr, "\n");
bdc44640 867 CPU_FOREACH(cpu) {
55e5c285 868 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 869 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
870 }
871 va_end(ap);
872 abort();
873}
874
875void cpu_synchronize_all_states(void)
876{
182735ef 877 CPUState *cpu;
296af7c9 878
bdc44640 879 CPU_FOREACH(cpu) {
182735ef 880 cpu_synchronize_state(cpu);
296af7c9
BS
881 }
882}
883
884void cpu_synchronize_all_post_reset(void)
885{
182735ef 886 CPUState *cpu;
296af7c9 887
bdc44640 888 CPU_FOREACH(cpu) {
182735ef 889 cpu_synchronize_post_reset(cpu);
296af7c9
BS
890 }
891}
892
893void cpu_synchronize_all_post_init(void)
894{
182735ef 895 CPUState *cpu;
296af7c9 896
bdc44640 897 CPU_FOREACH(cpu) {
182735ef 898 cpu_synchronize_post_init(cpu);
296af7c9
BS
899 }
900}
901
56983463 902static int do_vm_stop(RunState state)
296af7c9 903{
56983463
KW
904 int ret = 0;
905
1354869c 906 if (runstate_is_running()) {
296af7c9 907 cpu_disable_ticks();
296af7c9 908 pause_all_vcpus();
f5bbfba1 909 runstate_set(state);
1dfb4dd9 910 vm_state_notify(0, state);
a4e15de9 911 qapi_event_send_stop(&error_abort);
296af7c9 912 }
56983463 913
594a45ce 914 bdrv_drain_all();
6d0ceb80 915 replay_disable_events();
22af08ea 916 ret = bdrv_flush_all();
594a45ce 917
56983463 918 return ret;
296af7c9
BS
919}
920
a1fcaa73 921static bool cpu_can_run(CPUState *cpu)
296af7c9 922{
4fdeee7c 923 if (cpu->stop) {
a1fcaa73 924 return false;
0ab07c62 925 }
321bc0b2 926 if (cpu_is_stopped(cpu)) {
a1fcaa73 927 return false;
0ab07c62 928 }
a1fcaa73 929 return true;
296af7c9
BS
930}
931
91325046 932static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 933{
64f6b346 934 gdb_set_stop_cpu(cpu);
8cf71710 935 qemu_system_debug_request();
f324e766 936 cpu->stopped = true;
3c638d06
JK
937}
938
6d9cb73c
JK
939#ifdef CONFIG_LINUX
940static void sigbus_reraise(void)
941{
942 sigset_t set;
943 struct sigaction action;
944
945 memset(&action, 0, sizeof(action));
946 action.sa_handler = SIG_DFL;
947 if (!sigaction(SIGBUS, &action, NULL)) {
948 raise(SIGBUS);
949 sigemptyset(&set);
950 sigaddset(&set, SIGBUS);
a2d1761d 951 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
952 }
953 perror("Failed to re-raise SIGBUS!\n");
954 abort();
955}
956
d98d4072 957static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 958{
a16fc07e
PB
959 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
960 sigbus_reraise();
961 }
962
2ae41db2
PB
963 if (current_cpu) {
964 /* Called asynchronously in VCPU thread. */
965 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
966 sigbus_reraise();
967 }
968 } else {
969 /* Called synchronously (via signalfd) in main thread. */
970 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
971 sigbus_reraise();
972 }
6d9cb73c
JK
973 }
974}
975
976static void qemu_init_sigbus(void)
977{
978 struct sigaction action;
979
980 memset(&action, 0, sizeof(action));
981 action.sa_flags = SA_SIGINFO;
d98d4072 982 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
983 sigaction(SIGBUS, &action, NULL);
984
985 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
986}
6d9cb73c 987#else /* !CONFIG_LINUX */
6d9cb73c
JK
988static void qemu_init_sigbus(void)
989{
990}
a16fc07e 991#endif /* !CONFIG_LINUX */
ff48eb5f 992
b2532d88 993static QemuMutex qemu_global_mutex;
296af7c9
BS
994
995static QemuThread io_thread;
996
296af7c9
BS
997/* cpu creation */
998static QemuCond qemu_cpu_cond;
999/* system init */
296af7c9
BS
1000static QemuCond qemu_pause_cond;
1001
d3b12f5d 1002void qemu_init_cpu_loop(void)
296af7c9 1003{
6d9cb73c 1004 qemu_init_sigbus();
ed94592b 1005 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1006 qemu_cond_init(&qemu_pause_cond);
296af7c9 1007 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1008
b7680cb6 1009 qemu_thread_get_self(&io_thread);
296af7c9
BS
1010}
1011
14e6fe12 1012void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1013{
d148d90e 1014 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1015}
1016
4c055ab5
GZ
1017static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1018{
1019 if (kvm_destroy_vcpu(cpu) < 0) {
1020 error_report("kvm_destroy_vcpu failed");
1021 exit(EXIT_FAILURE);
1022 }
1023}
1024
1025static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1026{
1027}
1028
509a0d78 1029static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1030{
37257942 1031 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1032 if (cpu->stop) {
1033 cpu->stop = false;
f324e766 1034 cpu->stopped = true;
96bce683 1035 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1036 }
a5403c69 1037 process_queued_cpu_work(cpu);
37257942
AB
1038}
1039
1040static bool qemu_tcg_should_sleep(CPUState *cpu)
1041{
1042 if (mttcg_enabled) {
1043 return cpu_thread_is_idle(cpu);
1044 } else {
1045 return all_cpu_threads_idle();
1046 }
296af7c9
BS
1047}
1048
d5f8d613 1049static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1050{
37257942 1051 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1052 stop_tcg_kick_timer();
d5f8d613 1053 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1054 }
296af7c9 1055
6546706d
AB
1056 start_tcg_kick_timer();
1057
37257942 1058 qemu_wait_io_event_common(cpu);
296af7c9
BS
1059}
1060
fd529e8f 1061static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1062{
a98ae1d8 1063 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1064 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1065 }
296af7c9 1066
509a0d78 1067 qemu_wait_io_event_common(cpu);
296af7c9
BS
1068}
1069
7e97cd88 1070static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1071{
48a106bd 1072 CPUState *cpu = arg;
84b4915d 1073 int r;
296af7c9 1074
ab28bd23
PB
1075 rcu_register_thread();
1076
2e7f7a3c 1077 qemu_mutex_lock_iothread();
814e612e 1078 qemu_thread_get_self(cpu->thread);
9f09e18a 1079 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1080 cpu->can_do_io = 1;
4917cf44 1081 current_cpu = cpu;
296af7c9 1082
504134d2 1083 r = kvm_init_vcpu(cpu);
84b4915d
JK
1084 if (r < 0) {
1085 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1086 exit(1);
1087 }
296af7c9 1088
18268b60 1089 kvm_init_cpu_signals(cpu);
296af7c9
BS
1090
1091 /* signal CPU creation */
61a46217 1092 cpu->created = true;
296af7c9
BS
1093 qemu_cond_signal(&qemu_cpu_cond);
1094
4c055ab5 1095 do {
a1fcaa73 1096 if (cpu_can_run(cpu)) {
1458c363 1097 r = kvm_cpu_exec(cpu);
83f338f7 1098 if (r == EXCP_DEBUG) {
91325046 1099 cpu_handle_guest_debug(cpu);
83f338f7 1100 }
0ab07c62 1101 }
fd529e8f 1102 qemu_kvm_wait_io_event(cpu);
4c055ab5 1103 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1104
4c055ab5 1105 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1106 cpu->created = false;
1107 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1108 qemu_mutex_unlock_iothread();
296af7c9
BS
1109 return NULL;
1110}
1111
c7f0f3b1
AL
1112static void *qemu_dummy_cpu_thread_fn(void *arg)
1113{
1114#ifdef _WIN32
1115 fprintf(stderr, "qtest is not supported under Windows\n");
1116 exit(1);
1117#else
10a9021d 1118 CPUState *cpu = arg;
c7f0f3b1
AL
1119 sigset_t waitset;
1120 int r;
1121
ab28bd23
PB
1122 rcu_register_thread();
1123
c7f0f3b1 1124 qemu_mutex_lock_iothread();
814e612e 1125 qemu_thread_get_self(cpu->thread);
9f09e18a 1126 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1127 cpu->can_do_io = 1;
37257942 1128 current_cpu = cpu;
c7f0f3b1
AL
1129
1130 sigemptyset(&waitset);
1131 sigaddset(&waitset, SIG_IPI);
1132
1133 /* signal CPU creation */
61a46217 1134 cpu->created = true;
c7f0f3b1
AL
1135 qemu_cond_signal(&qemu_cpu_cond);
1136
c7f0f3b1 1137 while (1) {
c7f0f3b1
AL
1138 qemu_mutex_unlock_iothread();
1139 do {
1140 int sig;
1141 r = sigwait(&waitset, &sig);
1142 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1143 if (r == -1) {
1144 perror("sigwait");
1145 exit(1);
1146 }
1147 qemu_mutex_lock_iothread();
509a0d78 1148 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1149 }
1150
1151 return NULL;
1152#endif
1153}
1154
1be7fcb8
AB
1155static int64_t tcg_get_icount_limit(void)
1156{
1157 int64_t deadline;
1158
1159 if (replay_mode != REPLAY_MODE_PLAY) {
1160 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1161
1162 /* Maintain prior (possibly buggy) behaviour where if no deadline
1163 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1164 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1165 * nanoseconds.
1166 */
1167 if ((deadline < 0) || (deadline > INT32_MAX)) {
1168 deadline = INT32_MAX;
1169 }
1170
1171 return qemu_icount_round(deadline);
1172 } else {
1173 return replay_get_instructions();
1174 }
1175}
1176
12e9700d
AB
1177static void handle_icount_deadline(void)
1178{
6b8f0187 1179 assert(qemu_in_vcpu_thread());
12e9700d
AB
1180 if (use_icount) {
1181 int64_t deadline =
1182 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1183
1184 if (deadline == 0) {
6b8f0187 1185 /* Wake up other AioContexts. */
12e9700d 1186 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1187 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1188 }
1189 }
1190}
1191
05248382 1192static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1193{
1be7fcb8
AB
1194 if (use_icount) {
1195 int64_t count;
1196 int decr;
05248382
AB
1197
1198 /* These should always be cleared by process_icount_data after
1199 * each vCPU execution. However u16.high can be raised
1200 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1201 */
1202 g_assert(cpu->icount_decr.u16.low == 0);
1203 g_assert(cpu->icount_extra == 0);
1204
1205
1be7fcb8 1206 count = tcg_get_icount_limit();
05248382 1207
e4cd9657
AB
1208 /* To calculate what we have executed so far we need to know
1209 * what we originally budgeted to run this cycle */
1210 cpu->icount_budget = count;
1211
1be7fcb8
AB
1212 decr = (count > 0xffff) ? 0xffff : count;
1213 count -= decr;
1214 cpu->icount_decr.u16.low = decr;
1215 cpu->icount_extra = count;
1216 }
05248382
AB
1217}
1218
1219static void process_icount_data(CPUState *cpu)
1220{
1be7fcb8 1221 if (use_icount) {
e4cd9657
AB
1222 /* Account for executed instructions */
1223 timers_state.qemu_icount += cpu_get_icount_executed(cpu);
05248382
AB
1224
1225 /* Reset the counters */
1226 cpu->icount_decr.u16.low = 0;
1be7fcb8 1227 cpu->icount_extra = 0;
e4cd9657
AB
1228 cpu->icount_budget = 0;
1229
1be7fcb8
AB
1230 replay_account_executed_instructions();
1231 }
05248382
AB
1232}
1233
1234
1235static int tcg_cpu_exec(CPUState *cpu)
1236{
1237 int ret;
1238#ifdef CONFIG_PROFILER
1239 int64_t ti;
1240#endif
1241
1242#ifdef CONFIG_PROFILER
1243 ti = profile_getclock();
1244#endif
1245 qemu_mutex_unlock_iothread();
1246 cpu_exec_start(cpu);
1247 ret = cpu_exec(cpu);
1248 cpu_exec_end(cpu);
1249 qemu_mutex_lock_iothread();
1250#ifdef CONFIG_PROFILER
1251 tcg_time += profile_getclock() - ti;
1252#endif
1be7fcb8
AB
1253 return ret;
1254}
1255
c93bbbef
AB
1256/* Destroy any remaining vCPUs which have been unplugged and have
1257 * finished running
1258 */
1259static void deal_with_unplugged_cpus(void)
1be7fcb8 1260{
c93bbbef 1261 CPUState *cpu;
1be7fcb8 1262
c93bbbef
AB
1263 CPU_FOREACH(cpu) {
1264 if (cpu->unplug && !cpu_can_run(cpu)) {
1265 qemu_tcg_destroy_vcpu(cpu);
1266 cpu->created = false;
1267 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1268 break;
1269 }
1270 }
1be7fcb8 1271}
bdb7ca67 1272
6546706d
AB
1273/* Single-threaded TCG
1274 *
1275 * In the single-threaded case each vCPU is simulated in turn. If
1276 * there is more than a single vCPU we create a simple timer to kick
1277 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1278 * This is done explicitly rather than relying on side-effects
1279 * elsewhere.
1280 */
1281
37257942 1282static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1283{
c3586ba7 1284 CPUState *cpu = arg;
296af7c9 1285
ab28bd23
PB
1286 rcu_register_thread();
1287
2e7f7a3c 1288 qemu_mutex_lock_iothread();
814e612e 1289 qemu_thread_get_self(cpu->thread);
296af7c9 1290
38fcbd3f
AF
1291 CPU_FOREACH(cpu) {
1292 cpu->thread_id = qemu_get_thread_id();
1293 cpu->created = true;
626cf8f4 1294 cpu->can_do_io = 1;
38fcbd3f 1295 }
296af7c9
BS
1296 qemu_cond_signal(&qemu_cpu_cond);
1297
fa7d1867 1298 /* wait for initial kick-off after machine start */
c28e399c 1299 while (first_cpu->stopped) {
d5f8d613 1300 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1301
1302 /* process any pending work */
bdc44640 1303 CPU_FOREACH(cpu) {
37257942 1304 current_cpu = cpu;
182735ef 1305 qemu_wait_io_event_common(cpu);
8e564b4e 1306 }
0ab07c62 1307 }
296af7c9 1308
6546706d
AB
1309 start_tcg_kick_timer();
1310
c93bbbef
AB
1311 cpu = first_cpu;
1312
e5143e30
AB
1313 /* process any pending work */
1314 cpu->exit_request = 1;
1315
296af7c9 1316 while (1) {
c93bbbef
AB
1317 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1318 qemu_account_warp_timer();
1319
6b8f0187
PB
1320 /* Run the timers here. This is much more efficient than
1321 * waking up the I/O thread and waiting for completion.
1322 */
1323 handle_icount_deadline();
1324
c93bbbef
AB
1325 if (!cpu) {
1326 cpu = first_cpu;
1327 }
1328
e5143e30
AB
1329 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1330
791158d9 1331 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1332 current_cpu = cpu;
c93bbbef
AB
1333
1334 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1335 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1336
1337 if (cpu_can_run(cpu)) {
1338 int r;
05248382
AB
1339
1340 prepare_icount_for_run(cpu);
1341
c93bbbef 1342 r = tcg_cpu_exec(cpu);
05248382
AB
1343
1344 process_icount_data(cpu);
1345
c93bbbef
AB
1346 if (r == EXCP_DEBUG) {
1347 cpu_handle_guest_debug(cpu);
1348 break;
08e73c48
PK
1349 } else if (r == EXCP_ATOMIC) {
1350 qemu_mutex_unlock_iothread();
1351 cpu_exec_step_atomic(cpu);
1352 qemu_mutex_lock_iothread();
1353 break;
c93bbbef 1354 }
37257942 1355 } else if (cpu->stop) {
c93bbbef
AB
1356 if (cpu->unplug) {
1357 cpu = CPU_NEXT(cpu);
1358 }
1359 break;
1360 }
1361
e5143e30
AB
1362 cpu = CPU_NEXT(cpu);
1363 } /* while (cpu && !cpu->exit_request).. */
1364
791158d9
AB
1365 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1366 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1367
e5143e30
AB
1368 if (cpu && cpu->exit_request) {
1369 atomic_mb_set(&cpu->exit_request, 0);
1370 }
ac70aafc 1371
37257942 1372 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1373 deal_with_unplugged_cpus();
296af7c9
BS
1374 }
1375
1376 return NULL;
1377}
1378
b0cb0a66
VP
1379static void *qemu_hax_cpu_thread_fn(void *arg)
1380{
1381 CPUState *cpu = arg;
1382 int r;
b3d3a426
VP
1383
1384 qemu_mutex_lock_iothread();
b0cb0a66 1385 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1386
1387 cpu->thread_id = qemu_get_thread_id();
1388 cpu->created = true;
1389 cpu->halted = 0;
1390 current_cpu = cpu;
1391
1392 hax_init_vcpu(cpu);
1393 qemu_cond_signal(&qemu_cpu_cond);
1394
1395 while (1) {
1396 if (cpu_can_run(cpu)) {
1397 r = hax_smp_cpu_exec(cpu);
1398 if (r == EXCP_DEBUG) {
1399 cpu_handle_guest_debug(cpu);
1400 }
1401 }
1402
1403 while (cpu_thread_is_idle(cpu)) {
1404 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1405 }
1406#ifdef _WIN32
1407 SleepEx(0, TRUE);
1408#endif
1409 qemu_wait_io_event_common(cpu);
1410 }
1411 return NULL;
1412}
1413
1414#ifdef _WIN32
1415static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1416{
1417}
1418#endif
1419
37257942
AB
1420/* Multi-threaded TCG
1421 *
1422 * In the multi-threaded case each vCPU has its own thread. The TLS
1423 * variable current_cpu can be used deep in the code to find the
1424 * current CPUState for a given thread.
1425 */
1426
1427static void *qemu_tcg_cpu_thread_fn(void *arg)
1428{
1429 CPUState *cpu = arg;
1430
bf51c720
AB
1431 g_assert(!use_icount);
1432
37257942
AB
1433 rcu_register_thread();
1434
1435 qemu_mutex_lock_iothread();
1436 qemu_thread_get_self(cpu->thread);
1437
1438 cpu->thread_id = qemu_get_thread_id();
1439 cpu->created = true;
1440 cpu->can_do_io = 1;
1441 current_cpu = cpu;
1442 qemu_cond_signal(&qemu_cpu_cond);
1443
1444 /* process any pending work */
1445 cpu->exit_request = 1;
1446
1447 while (1) {
1448 if (cpu_can_run(cpu)) {
1449 int r;
1450 r = tcg_cpu_exec(cpu);
1451 switch (r) {
1452 case EXCP_DEBUG:
1453 cpu_handle_guest_debug(cpu);
1454 break;
1455 case EXCP_HALTED:
1456 /* during start-up the vCPU is reset and the thread is
1457 * kicked several times. If we don't ensure we go back
1458 * to sleep in the halted state we won't cleanly
1459 * start-up when the vCPU is enabled.
1460 *
1461 * cpu->halted should ensure we sleep in wait_io_event
1462 */
1463 g_assert(cpu->halted);
1464 break;
08e73c48
PK
1465 case EXCP_ATOMIC:
1466 qemu_mutex_unlock_iothread();
1467 cpu_exec_step_atomic(cpu);
1468 qemu_mutex_lock_iothread();
37257942
AB
1469 default:
1470 /* Ignore everything else? */
1471 break;
1472 }
1473 }
1474
37257942
AB
1475 atomic_mb_set(&cpu->exit_request, 0);
1476 qemu_tcg_wait_io_event(cpu);
1477 }
1478
1479 return NULL;
1480}
1481
2ff09a40 1482static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1483{
1484#ifndef _WIN32
1485 int err;
1486
e0c38211
PB
1487 if (cpu->thread_kicked) {
1488 return;
9102deda 1489 }
e0c38211 1490 cpu->thread_kicked = true;
814e612e 1491 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1492 if (err) {
1493 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1494 exit(1);
1495 }
1496#else /* _WIN32 */
b0cb0a66
VP
1497 if (!qemu_cpu_is_self(cpu)) {
1498 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1499 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1500 __func__, GetLastError());
1501 exit(1);
1502 }
1503 }
e0c38211
PB
1504#endif
1505}
ed9164a3 1506
c08d7424 1507void qemu_cpu_kick(CPUState *cpu)
296af7c9 1508{
f5c121b8 1509 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1510 if (tcg_enabled()) {
791158d9 1511 cpu_exit(cpu);
37257942 1512 /* NOP unless doing single-thread RR */
791158d9 1513 qemu_cpu_kick_rr_cpu();
e0c38211 1514 } else {
b0cb0a66
VP
1515 if (hax_enabled()) {
1516 /*
1517 * FIXME: race condition with the exit_request check in
1518 * hax_vcpu_hax_exec
1519 */
1520 cpu->exit_request = 1;
1521 }
e0c38211
PB
1522 qemu_cpu_kick_thread(cpu);
1523 }
296af7c9
BS
1524}
1525
46d62fac 1526void qemu_cpu_kick_self(void)
296af7c9 1527{
4917cf44 1528 assert(current_cpu);
9102deda 1529 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1530}
1531
60e82579 1532bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1533{
814e612e 1534 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1535}
1536
79e2b9ae 1537bool qemu_in_vcpu_thread(void)
aa723c23 1538{
4917cf44 1539 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1540}
1541
afbe7053
PB
1542static __thread bool iothread_locked = false;
1543
1544bool qemu_mutex_iothread_locked(void)
1545{
1546 return iothread_locked;
1547}
1548
296af7c9
BS
1549void qemu_mutex_lock_iothread(void)
1550{
8d04fb55
JK
1551 g_assert(!qemu_mutex_iothread_locked());
1552 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1553 iothread_locked = true;
296af7c9
BS
1554}
1555
1556void qemu_mutex_unlock_iothread(void)
1557{
8d04fb55 1558 g_assert(qemu_mutex_iothread_locked());
afbe7053 1559 iothread_locked = false;
296af7c9
BS
1560 qemu_mutex_unlock(&qemu_global_mutex);
1561}
1562
e8faee06 1563static bool all_vcpus_paused(void)
296af7c9 1564{
bdc44640 1565 CPUState *cpu;
296af7c9 1566
bdc44640 1567 CPU_FOREACH(cpu) {
182735ef 1568 if (!cpu->stopped) {
e8faee06 1569 return false;
0ab07c62 1570 }
296af7c9
BS
1571 }
1572
e8faee06 1573 return true;
296af7c9
BS
1574}
1575
1576void pause_all_vcpus(void)
1577{
bdc44640 1578 CPUState *cpu;
296af7c9 1579
40daca54 1580 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1581 CPU_FOREACH(cpu) {
182735ef
AF
1582 cpu->stop = true;
1583 qemu_cpu_kick(cpu);
296af7c9
BS
1584 }
1585
aa723c23 1586 if (qemu_in_vcpu_thread()) {
d798e974 1587 cpu_stop_current();
d798e974
JK
1588 }
1589
296af7c9 1590 while (!all_vcpus_paused()) {
be7d6c57 1591 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1592 CPU_FOREACH(cpu) {
182735ef 1593 qemu_cpu_kick(cpu);
296af7c9
BS
1594 }
1595 }
1596}
1597
2993683b
IM
1598void cpu_resume(CPUState *cpu)
1599{
1600 cpu->stop = false;
1601 cpu->stopped = false;
1602 qemu_cpu_kick(cpu);
1603}
1604
296af7c9
BS
1605void resume_all_vcpus(void)
1606{
bdc44640 1607 CPUState *cpu;
296af7c9 1608
40daca54 1609 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1610 CPU_FOREACH(cpu) {
182735ef 1611 cpu_resume(cpu);
296af7c9
BS
1612 }
1613}
1614
4c055ab5
GZ
1615void cpu_remove(CPUState *cpu)
1616{
1617 cpu->stop = true;
1618 cpu->unplug = true;
1619 qemu_cpu_kick(cpu);
1620}
1621
2c579042
BR
1622void cpu_remove_sync(CPUState *cpu)
1623{
1624 cpu_remove(cpu);
1625 while (cpu->created) {
1626 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1627 }
1628}
1629
4900116e
DDAG
1630/* For temporary buffers for forming a name */
1631#define VCPU_THREAD_NAME_SIZE 16
1632
e5ab30a2 1633static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1634{
4900116e 1635 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1636 static QemuCond *single_tcg_halt_cond;
1637 static QemuThread *single_tcg_cpu_thread;
4900116e 1638
37257942 1639 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1640 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1641 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1642 qemu_cond_init(cpu->halt_cond);
37257942
AB
1643
1644 if (qemu_tcg_mttcg_enabled()) {
1645 /* create a thread per vCPU with TCG (MTTCG) */
1646 parallel_cpus = true;
1647 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1648 cpu->cpu_index);
37257942
AB
1649
1650 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1651 cpu, QEMU_THREAD_JOINABLE);
1652
1653 } else {
1654 /* share a single thread for all cpus with TCG */
1655 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1656 qemu_thread_create(cpu->thread, thread_name,
1657 qemu_tcg_rr_cpu_thread_fn,
1658 cpu, QEMU_THREAD_JOINABLE);
1659
1660 single_tcg_halt_cond = cpu->halt_cond;
1661 single_tcg_cpu_thread = cpu->thread;
1662 }
1ecf47bf 1663#ifdef _WIN32
814e612e 1664 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1665#endif
61a46217 1666 while (!cpu->created) {
18a85728 1667 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1668 }
296af7c9 1669 } else {
37257942
AB
1670 /* For non-MTTCG cases we share the thread */
1671 cpu->thread = single_tcg_cpu_thread;
1672 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1673 }
1674}
1675
b0cb0a66
VP
1676static void qemu_hax_start_vcpu(CPUState *cpu)
1677{
1678 char thread_name[VCPU_THREAD_NAME_SIZE];
1679
1680 cpu->thread = g_malloc0(sizeof(QemuThread));
1681 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1682 qemu_cond_init(cpu->halt_cond);
1683
1684 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1685 cpu->cpu_index);
1686 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1687 cpu, QEMU_THREAD_JOINABLE);
1688#ifdef _WIN32
1689 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1690#endif
1691 while (!cpu->created) {
1692 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1693 }
1694}
1695
48a106bd 1696static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1697{
4900116e
DDAG
1698 char thread_name[VCPU_THREAD_NAME_SIZE];
1699
814e612e 1700 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1701 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1702 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1703 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1704 cpu->cpu_index);
1705 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1706 cpu, QEMU_THREAD_JOINABLE);
61a46217 1707 while (!cpu->created) {
18a85728 1708 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1709 }
296af7c9
BS
1710}
1711
10a9021d 1712static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1713{
4900116e
DDAG
1714 char thread_name[VCPU_THREAD_NAME_SIZE];
1715
814e612e 1716 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1717 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1718 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1719 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1720 cpu->cpu_index);
1721 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1722 QEMU_THREAD_JOINABLE);
61a46217 1723 while (!cpu->created) {
c7f0f3b1
AL
1724 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1725 }
1726}
1727
c643bed9 1728void qemu_init_vcpu(CPUState *cpu)
296af7c9 1729{
ce3960eb
AF
1730 cpu->nr_cores = smp_cores;
1731 cpu->nr_threads = smp_threads;
f324e766 1732 cpu->stopped = true;
56943e8c
PM
1733
1734 if (!cpu->as) {
1735 /* If the target cpu hasn't set up any address spaces itself,
1736 * give it the default one.
1737 */
6731d864
PC
1738 AddressSpace *as = address_space_init_shareable(cpu->memory,
1739 "cpu-memory");
12ebc9a7 1740 cpu->num_ases = 1;
6731d864 1741 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1742 }
1743
0ab07c62 1744 if (kvm_enabled()) {
48a106bd 1745 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1746 } else if (hax_enabled()) {
1747 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1748 } else if (tcg_enabled()) {
e5ab30a2 1749 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1750 } else {
10a9021d 1751 qemu_dummy_start_vcpu(cpu);
0ab07c62 1752 }
296af7c9
BS
1753}
1754
b4a3d965 1755void cpu_stop_current(void)
296af7c9 1756{
4917cf44
AF
1757 if (current_cpu) {
1758 current_cpu->stop = false;
1759 current_cpu->stopped = true;
1760 cpu_exit(current_cpu);
96bce683 1761 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1762 }
296af7c9
BS
1763}
1764
56983463 1765int vm_stop(RunState state)
296af7c9 1766{
aa723c23 1767 if (qemu_in_vcpu_thread()) {
74892d24 1768 qemu_system_vmstop_request_prepare();
1dfb4dd9 1769 qemu_system_vmstop_request(state);
296af7c9
BS
1770 /*
1771 * FIXME: should not return to device code in case
1772 * vm_stop() has been requested.
1773 */
b4a3d965 1774 cpu_stop_current();
56983463 1775 return 0;
296af7c9 1776 }
56983463
KW
1777
1778 return do_vm_stop(state);
296af7c9
BS
1779}
1780
2d76e823
CI
1781/**
1782 * Prepare for (re)starting the VM.
1783 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1784 * running or in case of an error condition), 0 otherwise.
1785 */
1786int vm_prepare_start(void)
1787{
1788 RunState requested;
1789 int res = 0;
1790
1791 qemu_vmstop_requested(&requested);
1792 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1793 return -1;
1794 }
1795
1796 /* Ensure that a STOP/RESUME pair of events is emitted if a
1797 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1798 * example, according to documentation is always followed by
1799 * the STOP event.
1800 */
1801 if (runstate_is_running()) {
1802 qapi_event_send_stop(&error_abort);
1803 res = -1;
1804 } else {
1805 replay_enable_events();
1806 cpu_enable_ticks();
1807 runstate_set(RUN_STATE_RUNNING);
1808 vm_state_notify(1, RUN_STATE_RUNNING);
1809 }
1810
1811 /* We are sending this now, but the CPUs will be resumed shortly later */
1812 qapi_event_send_resume(&error_abort);
1813 return res;
1814}
1815
1816void vm_start(void)
1817{
1818 if (!vm_prepare_start()) {
1819 resume_all_vcpus();
1820 }
1821}
1822
8a9236f1
LC
1823/* does a state transition even if the VM is already stopped,
1824 current state is forgotten forever */
56983463 1825int vm_stop_force_state(RunState state)
8a9236f1
LC
1826{
1827 if (runstate_is_running()) {
56983463 1828 return vm_stop(state);
8a9236f1
LC
1829 } else {
1830 runstate_set(state);
b2780d32
WC
1831
1832 bdrv_drain_all();
594a45ce
KW
1833 /* Make sure to return an error if the flush in a previous vm_stop()
1834 * failed. */
22af08ea 1835 return bdrv_flush_all();
8a9236f1
LC
1836 }
1837}
1838
9a78eead 1839void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1840{
1841 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1842#if defined(cpu_list)
1843 cpu_list(f, cpu_fprintf);
262353cb
BS
1844#endif
1845}
de0b36b6
LC
1846
1847CpuInfoList *qmp_query_cpus(Error **errp)
1848{
1849 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1850 CPUState *cpu;
de0b36b6 1851
bdc44640 1852 CPU_FOREACH(cpu) {
de0b36b6 1853 CpuInfoList *info;
182735ef
AF
1854#if defined(TARGET_I386)
1855 X86CPU *x86_cpu = X86_CPU(cpu);
1856 CPUX86State *env = &x86_cpu->env;
1857#elif defined(TARGET_PPC)
1858 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1859 CPUPPCState *env = &ppc_cpu->env;
1860#elif defined(TARGET_SPARC)
1861 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1862 CPUSPARCState *env = &sparc_cpu->env;
1863#elif defined(TARGET_MIPS)
1864 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1865 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1866#elif defined(TARGET_TRICORE)
1867 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1868 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1869#endif
de0b36b6 1870
cb446eca 1871 cpu_synchronize_state(cpu);
de0b36b6
LC
1872
1873 info = g_malloc0(sizeof(*info));
1874 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1875 info->value->CPU = cpu->cpu_index;
182735ef 1876 info->value->current = (cpu == first_cpu);
259186a7 1877 info->value->halted = cpu->halted;
58f88d4b 1878 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1879 info->value->thread_id = cpu->thread_id;
de0b36b6 1880#if defined(TARGET_I386)
86f4b687 1881 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1882 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1883#elif defined(TARGET_PPC)
86f4b687 1884 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1885 info->value->u.ppc.nip = env->nip;
de0b36b6 1886#elif defined(TARGET_SPARC)
86f4b687 1887 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1888 info->value->u.q_sparc.pc = env->pc;
1889 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1890#elif defined(TARGET_MIPS)
86f4b687 1891 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1892 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1893#elif defined(TARGET_TRICORE)
86f4b687 1894 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1895 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1896#else
1897 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1898#endif
1899
1900 /* XXX: waiting for the qapi to support GSList */
1901 if (!cur_item) {
1902 head = cur_item = info;
1903 } else {
1904 cur_item->next = info;
1905 cur_item = info;
1906 }
1907 }
1908
1909 return head;
1910}
0cfd6a9a
LC
1911
1912void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1913 bool has_cpu, int64_t cpu_index, Error **errp)
1914{
1915 FILE *f;
1916 uint32_t l;
55e5c285 1917 CPUState *cpu;
0cfd6a9a 1918 uint8_t buf[1024];
0dc9daf0 1919 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1920
1921 if (!has_cpu) {
1922 cpu_index = 0;
1923 }
1924
151d1322
AF
1925 cpu = qemu_get_cpu(cpu_index);
1926 if (cpu == NULL) {
c6bd8c70
MA
1927 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1928 "a CPU number");
0cfd6a9a
LC
1929 return;
1930 }
1931
1932 f = fopen(filename, "wb");
1933 if (!f) {
618da851 1934 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1935 return;
1936 }
1937
1938 while (size != 0) {
1939 l = sizeof(buf);
1940 if (l > size)
1941 l = size;
2f4d0f59 1942 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1943 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1944 " specified", orig_addr, orig_size);
2f4d0f59
AK
1945 goto exit;
1946 }
0cfd6a9a 1947 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1948 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1949 goto exit;
1950 }
1951 addr += l;
1952 size -= l;
1953 }
1954
1955exit:
1956 fclose(f);
1957}
6d3962bf
LC
1958
1959void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1960 Error **errp)
1961{
1962 FILE *f;
1963 uint32_t l;
1964 uint8_t buf[1024];
1965
1966 f = fopen(filename, "wb");
1967 if (!f) {
618da851 1968 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1969 return;
1970 }
1971
1972 while (size != 0) {
1973 l = sizeof(buf);
1974 if (l > size)
1975 l = size;
eb6282f2 1976 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1977 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1978 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1979 goto exit;
1980 }
1981 addr += l;
1982 size -= l;
1983 }
1984
1985exit:
1986 fclose(f);
1987}
ab49ab5c
LC
1988
1989void qmp_inject_nmi(Error **errp)
1990{
9cb805fd 1991 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1992}
27498bef
ST
1993
1994void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1995{
1996 if (!use_icount) {
1997 return;
1998 }
1999
2000 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2001 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2002 if (icount_align_option) {
2003 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2004 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2005 } else {
2006 cpu_fprintf(f, "Max guest delay NA\n");
2007 cpu_fprintf(f, "Max guest advance NA\n");
2008 }
2009}