]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
cpus: move icount preparation out of tcg_exec_cpu
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879 27#include "qemu-common.h"
8d4e9146 28#include "qemu/config-file.h"
33c11879 29#include "cpu.h"
83c9089e 30#include "monitor/monitor.h"
a4e15de9 31#include "qapi/qmp/qerror.h"
d49b6836 32#include "qemu/error-report.h"
9c17d615 33#include "sysemu/sysemu.h"
da31d594 34#include "sysemu/block-backend.h"
022c62cb 35#include "exec/gdbstub.h"
9c17d615 36#include "sysemu/dma.h"
b3946626 37#include "sysemu/hw_accel.h"
9c17d615 38#include "sysemu/kvm.h"
b0cb0a66 39#include "sysemu/hax.h"
de0b36b6 40#include "qmp-commands.h"
63c91552 41#include "exec/exec-all.h"
296af7c9 42
1de7afc9 43#include "qemu/thread.h"
9c17d615
PB
44#include "sysemu/cpus.h"
45#include "sysemu/qtest.h"
1de7afc9
PB
46#include "qemu/main-loop.h"
47#include "qemu/bitmap.h"
cb365646 48#include "qemu/seqlock.h"
8d4e9146 49#include "tcg.h"
a4e15de9 50#include "qapi-event.h"
9cb805fd 51#include "hw/nmi.h"
8b427044 52#include "sysemu/replay.h"
0ff0fc19 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
27498bef
ST
72int64_t max_delay;
73int64_t max_advance;
296af7c9 74
2adcc85d
JH
75/* vcpu throttling controls */
76static QEMUTimer *throttle_timer;
77static unsigned int throttle_percentage;
78
79#define CPU_THROTTLE_PCT_MIN 1
80#define CPU_THROTTLE_PCT_MAX 99
81#define CPU_THROTTLE_TIMESLICE_NS 10000000
82
321bc0b2
TC
83bool cpu_is_stopped(CPUState *cpu)
84{
85 return cpu->stopped || !runstate_is_running();
86}
87
a98ae1d8 88static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 89{
c64ca814 90 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
91 return false;
92 }
321bc0b2 93 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
94 return true;
95 }
8c2e1b00 96 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 97 kvm_halt_in_kernel()) {
ac873f1e
PM
98 return false;
99 }
100 return true;
101}
102
103static bool all_cpu_threads_idle(void)
104{
182735ef 105 CPUState *cpu;
ac873f1e 106
bdc44640 107 CPU_FOREACH(cpu) {
182735ef 108 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
109 return false;
110 }
111 }
112 return true;
113}
114
946fb27c
PB
115/***********************************************************/
116/* guest cycle counter */
117
a3270e19
PB
118/* Protected by TimersState seqlock */
119
5045e9d9 120static bool icount_sleep = true;
71468395 121static int64_t vm_clock_warp_start = -1;
946fb27c
PB
122/* Conversion factor from emulated instructions to virtual clock ticks. */
123static int icount_time_shift;
124/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125#define MAX_ICOUNT_SHIFT 10
a3270e19 126
946fb27c
PB
127static QEMUTimer *icount_rt_timer;
128static QEMUTimer *icount_vm_timer;
129static QEMUTimer *icount_warp_timer;
946fb27c
PB
130
131typedef struct TimersState {
cb365646 132 /* Protected by BQL. */
946fb27c
PB
133 int64_t cpu_ticks_prev;
134 int64_t cpu_ticks_offset;
cb365646
LPF
135
136 /* cpu_clock_offset can be read out of BQL, so protect it with
137 * this lock.
138 */
139 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
140 int64_t cpu_clock_offset;
141 int32_t cpu_ticks_enabled;
142 int64_t dummy;
c96778bb
FK
143
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias;
146 /* Only written by TCG thread */
147 int64_t qemu_icount;
946fb27c
PB
148} TimersState;
149
d9cd4007 150static TimersState timers_state;
8d4e9146
FK
151bool mttcg_enabled;
152
153/*
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
160 *
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
163 *
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
166 *
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
170 * down a lot.
171 */
172
173static bool check_tcg_memory_orders_compatible(void)
174{
175#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177#else
178 return false;
179#endif
180}
181
182static bool default_mttcg_enabled(void)
183{
83fd9629 184 if (use_icount || TCG_OVERSIZED_GUEST) {
8d4e9146
FK
185 return false;
186 } else {
187#ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
189#else
190 return false;
191#endif
192 }
193}
194
195void qemu_tcg_configure(QemuOpts *opts, Error **errp)
196{
197 const char *t = qemu_opt_get(opts, "thread");
198 if (t) {
199 if (strcmp(t, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST) {
201 error_setg(errp, "No MTTCG when guest word size > hosts");
83fd9629
AB
202 } else if (use_icount) {
203 error_setg(errp, "No MTTCG when icount is enabled");
8d4e9146 204 } else {
86953503 205#ifndef TARGET_SUPPORTS_MTTCG
c34c7620
AB
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
208#endif
8d4e9146
FK
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
8cfef892 212 error_printf("This may cause strange/hard to debug errors\n");
8d4e9146
FK
213 }
214 mttcg_enabled = true;
215 }
216 } else if (strcmp(t, "single") == 0) {
217 mttcg_enabled = false;
218 } else {
219 error_setg(errp, "Invalid 'thread' setting %s", t);
220 }
221 } else {
222 mttcg_enabled = default_mttcg_enabled();
223 }
224}
946fb27c 225
2a62914b 226int64_t cpu_get_icount_raw(void)
946fb27c
PB
227{
228 int64_t icount;
4917cf44 229 CPUState *cpu = current_cpu;
946fb27c 230
c96778bb 231 icount = timers_state.qemu_icount;
243c5f77 232 if (cpu && cpu->running) {
414b15c9 233 if (!cpu->can_do_io) {
2a62914b
PD
234 fprintf(stderr, "Bad icount read\n");
235 exit(1);
946fb27c 236 }
28ecfd7a 237 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 238 }
2a62914b
PD
239 return icount;
240}
241
242/* Return the virtual CPU time, based on the instruction counter. */
243static int64_t cpu_get_icount_locked(void)
244{
245 int64_t icount = cpu_get_icount_raw();
3f031313 246 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
247}
248
17a15f1b
PB
249int64_t cpu_get_icount(void)
250{
251 int64_t icount;
252 unsigned start;
253
254 do {
255 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
256 icount = cpu_get_icount_locked();
257 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
258
259 return icount;
260}
261
3f031313
FK
262int64_t cpu_icount_to_ns(int64_t icount)
263{
264 return icount << icount_time_shift;
265}
266
d90f3cca
C
267/* return the time elapsed in VM between vm_start and vm_stop. Unless
268 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
269 * counter.
270 *
271 * Caller must hold the BQL
272 */
946fb27c
PB
273int64_t cpu_get_ticks(void)
274{
5f3e3101
PB
275 int64_t ticks;
276
946fb27c
PB
277 if (use_icount) {
278 return cpu_get_icount();
279 }
5f3e3101
PB
280
281 ticks = timers_state.cpu_ticks_offset;
282 if (timers_state.cpu_ticks_enabled) {
4a7428c5 283 ticks += cpu_get_host_ticks();
5f3e3101
PB
284 }
285
286 if (timers_state.cpu_ticks_prev > ticks) {
287 /* Note: non increasing ticks may happen if the host uses
288 software suspend */
289 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
290 ticks = timers_state.cpu_ticks_prev;
946fb27c 291 }
5f3e3101
PB
292
293 timers_state.cpu_ticks_prev = ticks;
294 return ticks;
946fb27c
PB
295}
296
cb365646 297static int64_t cpu_get_clock_locked(void)
946fb27c 298{
1d45cea5 299 int64_t time;
cb365646 300
1d45cea5 301 time = timers_state.cpu_clock_offset;
5f3e3101 302 if (timers_state.cpu_ticks_enabled) {
1d45cea5 303 time += get_clock();
946fb27c 304 }
cb365646 305
1d45cea5 306 return time;
cb365646
LPF
307}
308
d90f3cca 309/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
310 * the time between vm_start and vm_stop
311 */
cb365646
LPF
312int64_t cpu_get_clock(void)
313{
314 int64_t ti;
315 unsigned start;
316
317 do {
318 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
319 ti = cpu_get_clock_locked();
320 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
321
322 return ti;
946fb27c
PB
323}
324
cb365646 325/* enable cpu_get_ticks()
3224e878 326 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 327 */
946fb27c
PB
328void cpu_enable_ticks(void)
329{
cb365646 330 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 331 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 332 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 333 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
334 timers_state.cpu_clock_offset -= get_clock();
335 timers_state.cpu_ticks_enabled = 1;
336 }
03719e44 337 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
338}
339
340/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 341 * cpu_get_ticks() after that.
3224e878 342 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 343 */
946fb27c
PB
344void cpu_disable_ticks(void)
345{
cb365646 346 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 347 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 348 if (timers_state.cpu_ticks_enabled) {
4a7428c5 349 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 350 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
351 timers_state.cpu_ticks_enabled = 0;
352 }
03719e44 353 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
354}
355
356/* Correlation between real and virtual time is always going to be
357 fairly approximate, so ignore small variation.
358 When the guest is idle real and virtual time will be aligned in
359 the IO wait loop. */
73bcb24d 360#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
361
362static void icount_adjust(void)
363{
364 int64_t cur_time;
365 int64_t cur_icount;
366 int64_t delta;
a3270e19
PB
367
368 /* Protected by TimersState mutex. */
946fb27c 369 static int64_t last_delta;
468cc7cf 370
946fb27c
PB
371 /* If the VM is not running, then do nothing. */
372 if (!runstate_is_running()) {
373 return;
374 }
468cc7cf 375
03719e44 376 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
377 cur_time = cpu_get_clock_locked();
378 cur_icount = cpu_get_icount_locked();
468cc7cf 379
946fb27c
PB
380 delta = cur_icount - cur_time;
381 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
382 if (delta > 0
383 && last_delta + ICOUNT_WOBBLE < delta * 2
384 && icount_time_shift > 0) {
385 /* The guest is getting too far ahead. Slow time down. */
386 icount_time_shift--;
387 }
388 if (delta < 0
389 && last_delta - ICOUNT_WOBBLE > delta * 2
390 && icount_time_shift < MAX_ICOUNT_SHIFT) {
391 /* The guest is getting too far behind. Speed time up. */
392 icount_time_shift++;
393 }
394 last_delta = delta;
c96778bb
FK
395 timers_state.qemu_icount_bias = cur_icount
396 - (timers_state.qemu_icount << icount_time_shift);
03719e44 397 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
398}
399
400static void icount_adjust_rt(void *opaque)
401{
40daca54 402 timer_mod(icount_rt_timer,
1979b908 403 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
404 icount_adjust();
405}
406
407static void icount_adjust_vm(void *opaque)
408{
40daca54
AB
409 timer_mod(icount_vm_timer,
410 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 411 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
412 icount_adjust();
413}
414
415static int64_t qemu_icount_round(int64_t count)
416{
417 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
418}
419
efab87cf 420static void icount_warp_rt(void)
946fb27c 421{
ccffff48
AB
422 unsigned seq;
423 int64_t warp_start;
424
17a15f1b
PB
425 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
426 * changes from -1 to another value, so the race here is okay.
427 */
ccffff48
AB
428 do {
429 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
430 warp_start = vm_clock_warp_start;
431 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
432
433 if (warp_start == -1) {
946fb27c
PB
434 return;
435 }
436
03719e44 437 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 438 if (runstate_is_running()) {
8eda206e
PD
439 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
440 cpu_get_clock_locked());
8ed961d9
PB
441 int64_t warp_delta;
442
443 warp_delta = clock - vm_clock_warp_start;
444 if (use_icount == 2) {
946fb27c 445 /*
40daca54 446 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
447 * far ahead of real time.
448 */
17a15f1b 449 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 450 int64_t delta = clock - cur_icount;
8ed961d9 451 warp_delta = MIN(warp_delta, delta);
946fb27c 452 }
c96778bb 453 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
454 }
455 vm_clock_warp_start = -1;
03719e44 456 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
457
458 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
459 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
460 }
946fb27c
PB
461}
462
e76d1798 463static void icount_timer_cb(void *opaque)
efab87cf 464{
e76d1798
PD
465 /* No need for a checkpoint because the timer already synchronizes
466 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
467 */
468 icount_warp_rt();
efab87cf
PD
469}
470
8156be56
PB
471void qtest_clock_warp(int64_t dest)
472{
40daca54 473 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 474 AioContext *aio_context;
8156be56 475 assert(qtest_enabled());
efef88b3 476 aio_context = qemu_get_aio_context();
8156be56 477 while (clock < dest) {
40daca54 478 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 479 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 480
03719e44 481 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 482 timers_state.qemu_icount_bias += warp;
03719e44 483 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 484
40daca54 485 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 486 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 487 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 488 }
40daca54 489 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
490}
491
e76d1798 492void qemu_start_warp_timer(void)
946fb27c 493{
ce78d18c 494 int64_t clock;
946fb27c
PB
495 int64_t deadline;
496
e76d1798 497 if (!use_icount) {
946fb27c
PB
498 return;
499 }
500
8bd7f71d
PD
501 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
502 * do not fire, so computing the deadline does not make sense.
503 */
504 if (!runstate_is_running()) {
505 return;
506 }
507
508 /* warp clock deterministically in record/replay mode */
e76d1798 509 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
510 return;
511 }
512
ce78d18c 513 if (!all_cpu_threads_idle()) {
946fb27c
PB
514 return;
515 }
516
8156be56
PB
517 if (qtest_enabled()) {
518 /* When testing, qtest commands advance icount. */
e76d1798 519 return;
8156be56
PB
520 }
521
ac70aafc 522 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 523 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 524 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 525 if (deadline < 0) {
d7a0f71d
VC
526 static bool notified;
527 if (!icount_sleep && !notified) {
528 error_report("WARNING: icount sleep disabled and no active timers");
529 notified = true;
530 }
ce78d18c 531 return;
ac70aafc
AB
532 }
533
946fb27c
PB
534 if (deadline > 0) {
535 /*
40daca54 536 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
537 * sleep. Otherwise, the CPU might be waiting for a future timer
538 * interrupt to wake it up, but the interrupt never comes because
539 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 540 * QEMU_CLOCK_VIRTUAL.
946fb27c 541 */
5045e9d9
VC
542 if (!icount_sleep) {
543 /*
544 * We never let VCPUs sleep in no sleep icount mode.
545 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
546 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
547 * It is useful when we want a deterministic execution time,
548 * isolated from host latencies.
549 */
03719e44 550 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 551 timers_state.qemu_icount_bias += deadline;
03719e44 552 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
553 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
554 } else {
555 /*
556 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
557 * "real" time, (related to the time left until the next event) has
558 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
559 * This avoids that the warps are visible externally; for example,
560 * you will not be sending network packets continuously instead of
561 * every 100ms.
562 */
03719e44 563 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
564 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
565 vm_clock_warp_start = clock;
566 }
03719e44 567 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 568 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 569 }
ac70aafc 570 } else if (deadline == 0) {
40daca54 571 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
572 }
573}
574
e76d1798
PD
575static void qemu_account_warp_timer(void)
576{
577 if (!use_icount || !icount_sleep) {
578 return;
579 }
580
581 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
582 * do not fire, so computing the deadline does not make sense.
583 */
584 if (!runstate_is_running()) {
585 return;
586 }
587
588 /* warp clock deterministically in record/replay mode */
589 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
590 return;
591 }
592
593 timer_del(icount_warp_timer);
594 icount_warp_rt();
595}
596
d09eae37
FK
597static bool icount_state_needed(void *opaque)
598{
599 return use_icount;
600}
601
602/*
603 * This is a subsection for icount migration.
604 */
605static const VMStateDescription icount_vmstate_timers = {
606 .name = "timer/icount",
607 .version_id = 1,
608 .minimum_version_id = 1,
5cd8cada 609 .needed = icount_state_needed,
d09eae37
FK
610 .fields = (VMStateField[]) {
611 VMSTATE_INT64(qemu_icount_bias, TimersState),
612 VMSTATE_INT64(qemu_icount, TimersState),
613 VMSTATE_END_OF_LIST()
614 }
615};
616
946fb27c
PB
617static const VMStateDescription vmstate_timers = {
618 .name = "timer",
619 .version_id = 2,
620 .minimum_version_id = 1,
35d08458 621 .fields = (VMStateField[]) {
946fb27c
PB
622 VMSTATE_INT64(cpu_ticks_offset, TimersState),
623 VMSTATE_INT64(dummy, TimersState),
624 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
625 VMSTATE_END_OF_LIST()
d09eae37 626 },
5cd8cada
JQ
627 .subsections = (const VMStateDescription*[]) {
628 &icount_vmstate_timers,
629 NULL
946fb27c
PB
630 }
631};
632
14e6fe12 633static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 634{
2adcc85d
JH
635 double pct;
636 double throttle_ratio;
637 long sleeptime_ns;
638
639 if (!cpu_throttle_get_percentage()) {
640 return;
641 }
642
643 pct = (double)cpu_throttle_get_percentage()/100;
644 throttle_ratio = pct / (1 - pct);
645 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
646
647 qemu_mutex_unlock_iothread();
648 atomic_set(&cpu->throttle_thread_scheduled, 0);
649 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
650 qemu_mutex_lock_iothread();
651}
652
653static void cpu_throttle_timer_tick(void *opaque)
654{
655 CPUState *cpu;
656 double pct;
657
658 /* Stop the timer if needed */
659 if (!cpu_throttle_get_percentage()) {
660 return;
661 }
662 CPU_FOREACH(cpu) {
663 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
664 async_run_on_cpu(cpu, cpu_throttle_thread,
665 RUN_ON_CPU_NULL);
2adcc85d
JH
666 }
667 }
668
669 pct = (double)cpu_throttle_get_percentage()/100;
670 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
671 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
672}
673
674void cpu_throttle_set(int new_throttle_pct)
675{
676 /* Ensure throttle percentage is within valid range */
677 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
678 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
679
680 atomic_set(&throttle_percentage, new_throttle_pct);
681
682 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
683 CPU_THROTTLE_TIMESLICE_NS);
684}
685
686void cpu_throttle_stop(void)
687{
688 atomic_set(&throttle_percentage, 0);
689}
690
691bool cpu_throttle_active(void)
692{
693 return (cpu_throttle_get_percentage() != 0);
694}
695
696int cpu_throttle_get_percentage(void)
697{
698 return atomic_read(&throttle_percentage);
699}
700
4603ea01
PD
701void cpu_ticks_init(void)
702{
ccdb3c1f 703 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 704 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
705 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
706 cpu_throttle_timer_tick, NULL);
4603ea01
PD
707}
708
1ad9580b 709void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 710{
1ad9580b 711 const char *option;
a8bfac37 712 char *rem_str = NULL;
1ad9580b 713
1ad9580b 714 option = qemu_opt_get(opts, "shift");
946fb27c 715 if (!option) {
a8bfac37
ST
716 if (qemu_opt_get(opts, "align") != NULL) {
717 error_setg(errp, "Please specify shift option when using align");
718 }
946fb27c
PB
719 return;
720 }
f1f4b57e
VC
721
722 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
723 if (icount_sleep) {
724 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 725 icount_timer_cb, NULL);
5045e9d9 726 }
f1f4b57e 727
a8bfac37 728 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
729
730 if (icount_align_option && !icount_sleep) {
778d9f9b 731 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 732 }
946fb27c 733 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
734 errno = 0;
735 icount_time_shift = strtol(option, &rem_str, 0);
736 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
737 error_setg(errp, "icount: Invalid shift value");
738 }
946fb27c
PB
739 use_icount = 1;
740 return;
a8bfac37
ST
741 } else if (icount_align_option) {
742 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 743 } else if (!icount_sleep) {
778d9f9b 744 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
745 }
746
747 use_icount = 2;
748
749 /* 125MIPS seems a reasonable initial guess at the guest speed.
750 It will be corrected fairly quickly anyway. */
751 icount_time_shift = 3;
752
753 /* Have both realtime and virtual time triggers for speed adjustment.
754 The realtime trigger catches emulated time passing too slowly,
755 the virtual time trigger catches emulated time passing too fast.
756 Realtime triggers occur even when idle, so use them less frequently
757 than VM triggers. */
bf2a7ddb
PD
758 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
759 icount_adjust_rt, NULL);
40daca54 760 timer_mod(icount_rt_timer,
bf2a7ddb 761 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
762 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
763 icount_adjust_vm, NULL);
764 timer_mod(icount_vm_timer,
765 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 766 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
767}
768
6546706d
AB
769/***********************************************************/
770/* TCG vCPU kick timer
771 *
772 * The kick timer is responsible for moving single threaded vCPU
773 * emulation on to the next vCPU. If more than one vCPU is running a
774 * timer event with force a cpu->exit so the next vCPU can get
775 * scheduled.
776 *
777 * The timer is removed if all vCPUs are idle and restarted again once
778 * idleness is complete.
779 */
780
781static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 782static CPUState *tcg_current_rr_cpu;
6546706d
AB
783
784#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
785
786static inline int64_t qemu_tcg_next_kick(void)
787{
788 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
789}
790
791158d9
AB
791/* Kick the currently round-robin scheduled vCPU */
792static void qemu_cpu_kick_rr_cpu(void)
793{
794 CPUState *cpu;
791158d9
AB
795 do {
796 cpu = atomic_mb_read(&tcg_current_rr_cpu);
797 if (cpu) {
798 cpu_exit(cpu);
799 }
800 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
801}
802
6b8f0187
PB
803static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
804{
805}
806
3f53bc61
PB
807void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
808{
6b8f0187
PB
809 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
810 qemu_notify_event();
811 return;
812 }
813
814 if (!qemu_in_vcpu_thread() && first_cpu) {
815 /* qemu_cpu_kick is not enough to kick a halted CPU out of
816 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
817 * causes cpu_thread_is_idle to return false. This way,
818 * handle_icount_deadline can run.
819 */
820 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
821 }
3f53bc61
PB
822}
823
6546706d
AB
824static void kick_tcg_thread(void *opaque)
825{
826 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
791158d9 827 qemu_cpu_kick_rr_cpu();
6546706d
AB
828}
829
830static void start_tcg_kick_timer(void)
831{
37257942 832 if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
833 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
834 kick_tcg_thread, NULL);
835 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
836 }
837}
838
839static void stop_tcg_kick_timer(void)
840{
841 if (tcg_kick_vcpu_timer) {
842 timer_del(tcg_kick_vcpu_timer);
843 tcg_kick_vcpu_timer = NULL;
844 }
845}
846
296af7c9
BS
847/***********************************************************/
848void hw_error(const char *fmt, ...)
849{
850 va_list ap;
55e5c285 851 CPUState *cpu;
296af7c9
BS
852
853 va_start(ap, fmt);
854 fprintf(stderr, "qemu: hardware error: ");
855 vfprintf(stderr, fmt, ap);
856 fprintf(stderr, "\n");
bdc44640 857 CPU_FOREACH(cpu) {
55e5c285 858 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 859 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
860 }
861 va_end(ap);
862 abort();
863}
864
865void cpu_synchronize_all_states(void)
866{
182735ef 867 CPUState *cpu;
296af7c9 868
bdc44640 869 CPU_FOREACH(cpu) {
182735ef 870 cpu_synchronize_state(cpu);
296af7c9
BS
871 }
872}
873
874void cpu_synchronize_all_post_reset(void)
875{
182735ef 876 CPUState *cpu;
296af7c9 877
bdc44640 878 CPU_FOREACH(cpu) {
182735ef 879 cpu_synchronize_post_reset(cpu);
296af7c9
BS
880 }
881}
882
883void cpu_synchronize_all_post_init(void)
884{
182735ef 885 CPUState *cpu;
296af7c9 886
bdc44640 887 CPU_FOREACH(cpu) {
182735ef 888 cpu_synchronize_post_init(cpu);
296af7c9
BS
889 }
890}
891
56983463 892static int do_vm_stop(RunState state)
296af7c9 893{
56983463
KW
894 int ret = 0;
895
1354869c 896 if (runstate_is_running()) {
296af7c9 897 cpu_disable_ticks();
296af7c9 898 pause_all_vcpus();
f5bbfba1 899 runstate_set(state);
1dfb4dd9 900 vm_state_notify(0, state);
a4e15de9 901 qapi_event_send_stop(&error_abort);
296af7c9 902 }
56983463 903
594a45ce 904 bdrv_drain_all();
6d0ceb80 905 replay_disable_events();
22af08ea 906 ret = bdrv_flush_all();
594a45ce 907
56983463 908 return ret;
296af7c9
BS
909}
910
a1fcaa73 911static bool cpu_can_run(CPUState *cpu)
296af7c9 912{
4fdeee7c 913 if (cpu->stop) {
a1fcaa73 914 return false;
0ab07c62 915 }
321bc0b2 916 if (cpu_is_stopped(cpu)) {
a1fcaa73 917 return false;
0ab07c62 918 }
a1fcaa73 919 return true;
296af7c9
BS
920}
921
91325046 922static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 923{
64f6b346 924 gdb_set_stop_cpu(cpu);
8cf71710 925 qemu_system_debug_request();
f324e766 926 cpu->stopped = true;
3c638d06
JK
927}
928
6d9cb73c
JK
929#ifdef CONFIG_LINUX
930static void sigbus_reraise(void)
931{
932 sigset_t set;
933 struct sigaction action;
934
935 memset(&action, 0, sizeof(action));
936 action.sa_handler = SIG_DFL;
937 if (!sigaction(SIGBUS, &action, NULL)) {
938 raise(SIGBUS);
939 sigemptyset(&set);
940 sigaddset(&set, SIGBUS);
a2d1761d 941 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
942 }
943 perror("Failed to re-raise SIGBUS!\n");
944 abort();
945}
946
d98d4072 947static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 948{
a16fc07e
PB
949 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
950 sigbus_reraise();
951 }
952
2ae41db2
PB
953 if (current_cpu) {
954 /* Called asynchronously in VCPU thread. */
955 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
956 sigbus_reraise();
957 }
958 } else {
959 /* Called synchronously (via signalfd) in main thread. */
960 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
961 sigbus_reraise();
962 }
6d9cb73c
JK
963 }
964}
965
966static void qemu_init_sigbus(void)
967{
968 struct sigaction action;
969
970 memset(&action, 0, sizeof(action));
971 action.sa_flags = SA_SIGINFO;
d98d4072 972 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
973 sigaction(SIGBUS, &action, NULL);
974
975 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
976}
6d9cb73c 977#else /* !CONFIG_LINUX */
6d9cb73c
JK
978static void qemu_init_sigbus(void)
979{
980}
a16fc07e 981#endif /* !CONFIG_LINUX */
ff48eb5f 982
b2532d88 983static QemuMutex qemu_global_mutex;
296af7c9
BS
984
985static QemuThread io_thread;
986
296af7c9
BS
987/* cpu creation */
988static QemuCond qemu_cpu_cond;
989/* system init */
296af7c9
BS
990static QemuCond qemu_pause_cond;
991
d3b12f5d 992void qemu_init_cpu_loop(void)
296af7c9 993{
6d9cb73c 994 qemu_init_sigbus();
ed94592b 995 qemu_cond_init(&qemu_cpu_cond);
ed94592b 996 qemu_cond_init(&qemu_pause_cond);
296af7c9 997 qemu_mutex_init(&qemu_global_mutex);
296af7c9 998
b7680cb6 999 qemu_thread_get_self(&io_thread);
296af7c9
BS
1000}
1001
14e6fe12 1002void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1003{
d148d90e 1004 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1005}
1006
4c055ab5
GZ
1007static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1008{
1009 if (kvm_destroy_vcpu(cpu) < 0) {
1010 error_report("kvm_destroy_vcpu failed");
1011 exit(EXIT_FAILURE);
1012 }
1013}
1014
1015static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1016{
1017}
1018
509a0d78 1019static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1020{
37257942 1021 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c
AF
1022 if (cpu->stop) {
1023 cpu->stop = false;
f324e766 1024 cpu->stopped = true;
96bce683 1025 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1026 }
a5403c69 1027 process_queued_cpu_work(cpu);
37257942
AB
1028}
1029
1030static bool qemu_tcg_should_sleep(CPUState *cpu)
1031{
1032 if (mttcg_enabled) {
1033 return cpu_thread_is_idle(cpu);
1034 } else {
1035 return all_cpu_threads_idle();
1036 }
296af7c9
BS
1037}
1038
d5f8d613 1039static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1040{
37257942 1041 while (qemu_tcg_should_sleep(cpu)) {
6546706d 1042 stop_tcg_kick_timer();
d5f8d613 1043 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1044 }
296af7c9 1045
6546706d
AB
1046 start_tcg_kick_timer();
1047
37257942 1048 qemu_wait_io_event_common(cpu);
296af7c9
BS
1049}
1050
fd529e8f 1051static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1052{
a98ae1d8 1053 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1054 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1055 }
296af7c9 1056
509a0d78 1057 qemu_wait_io_event_common(cpu);
296af7c9
BS
1058}
1059
7e97cd88 1060static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1061{
48a106bd 1062 CPUState *cpu = arg;
84b4915d 1063 int r;
296af7c9 1064
ab28bd23
PB
1065 rcu_register_thread();
1066
2e7f7a3c 1067 qemu_mutex_lock_iothread();
814e612e 1068 qemu_thread_get_self(cpu->thread);
9f09e18a 1069 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1070 cpu->can_do_io = 1;
4917cf44 1071 current_cpu = cpu;
296af7c9 1072
504134d2 1073 r = kvm_init_vcpu(cpu);
84b4915d
JK
1074 if (r < 0) {
1075 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1076 exit(1);
1077 }
296af7c9 1078
18268b60 1079 kvm_init_cpu_signals(cpu);
296af7c9
BS
1080
1081 /* signal CPU creation */
61a46217 1082 cpu->created = true;
296af7c9
BS
1083 qemu_cond_signal(&qemu_cpu_cond);
1084
4c055ab5 1085 do {
a1fcaa73 1086 if (cpu_can_run(cpu)) {
1458c363 1087 r = kvm_cpu_exec(cpu);
83f338f7 1088 if (r == EXCP_DEBUG) {
91325046 1089 cpu_handle_guest_debug(cpu);
83f338f7 1090 }
0ab07c62 1091 }
fd529e8f 1092 qemu_kvm_wait_io_event(cpu);
4c055ab5 1093 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1094
4c055ab5 1095 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1096 cpu->created = false;
1097 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1098 qemu_mutex_unlock_iothread();
296af7c9
BS
1099 return NULL;
1100}
1101
c7f0f3b1
AL
1102static void *qemu_dummy_cpu_thread_fn(void *arg)
1103{
1104#ifdef _WIN32
1105 fprintf(stderr, "qtest is not supported under Windows\n");
1106 exit(1);
1107#else
10a9021d 1108 CPUState *cpu = arg;
c7f0f3b1
AL
1109 sigset_t waitset;
1110 int r;
1111
ab28bd23
PB
1112 rcu_register_thread();
1113
c7f0f3b1 1114 qemu_mutex_lock_iothread();
814e612e 1115 qemu_thread_get_self(cpu->thread);
9f09e18a 1116 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1117 cpu->can_do_io = 1;
37257942 1118 current_cpu = cpu;
c7f0f3b1
AL
1119
1120 sigemptyset(&waitset);
1121 sigaddset(&waitset, SIG_IPI);
1122
1123 /* signal CPU creation */
61a46217 1124 cpu->created = true;
c7f0f3b1
AL
1125 qemu_cond_signal(&qemu_cpu_cond);
1126
c7f0f3b1 1127 while (1) {
c7f0f3b1
AL
1128 qemu_mutex_unlock_iothread();
1129 do {
1130 int sig;
1131 r = sigwait(&waitset, &sig);
1132 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1133 if (r == -1) {
1134 perror("sigwait");
1135 exit(1);
1136 }
1137 qemu_mutex_lock_iothread();
509a0d78 1138 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1139 }
1140
1141 return NULL;
1142#endif
1143}
1144
1be7fcb8
AB
1145static int64_t tcg_get_icount_limit(void)
1146{
1147 int64_t deadline;
1148
1149 if (replay_mode != REPLAY_MODE_PLAY) {
1150 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1151
1152 /* Maintain prior (possibly buggy) behaviour where if no deadline
1153 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1154 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1155 * nanoseconds.
1156 */
1157 if ((deadline < 0) || (deadline > INT32_MAX)) {
1158 deadline = INT32_MAX;
1159 }
1160
1161 return qemu_icount_round(deadline);
1162 } else {
1163 return replay_get_instructions();
1164 }
1165}
1166
12e9700d
AB
1167static void handle_icount_deadline(void)
1168{
6b8f0187 1169 assert(qemu_in_vcpu_thread());
12e9700d
AB
1170 if (use_icount) {
1171 int64_t deadline =
1172 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1173
1174 if (deadline == 0) {
6b8f0187 1175 /* Wake up other AioContexts. */
12e9700d 1176 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1177 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1178 }
1179 }
1180}
1181
05248382 1182static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1183{
1be7fcb8
AB
1184 if (use_icount) {
1185 int64_t count;
1186 int decr;
05248382
AB
1187
1188 /* These should always be cleared by process_icount_data after
1189 * each vCPU execution. However u16.high can be raised
1190 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1191 */
1192 g_assert(cpu->icount_decr.u16.low == 0);
1193 g_assert(cpu->icount_extra == 0);
1194
1195
1be7fcb8 1196 count = tcg_get_icount_limit();
05248382 1197
1be7fcb8
AB
1198 timers_state.qemu_icount += count;
1199 decr = (count > 0xffff) ? 0xffff : count;
1200 count -= decr;
1201 cpu->icount_decr.u16.low = decr;
1202 cpu->icount_extra = count;
1203 }
05248382
AB
1204}
1205
1206static void process_icount_data(CPUState *cpu)
1207{
1be7fcb8
AB
1208 if (use_icount) {
1209 /* Fold pending instructions back into the
1210 instruction counter, and clear the interrupt flag. */
1211 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1212 + cpu->icount_extra);
05248382
AB
1213
1214 /* Reset the counters */
1215 cpu->icount_decr.u16.low = 0;
1be7fcb8
AB
1216 cpu->icount_extra = 0;
1217 replay_account_executed_instructions();
1218 }
05248382
AB
1219}
1220
1221
1222static int tcg_cpu_exec(CPUState *cpu)
1223{
1224 int ret;
1225#ifdef CONFIG_PROFILER
1226 int64_t ti;
1227#endif
1228
1229#ifdef CONFIG_PROFILER
1230 ti = profile_getclock();
1231#endif
1232 qemu_mutex_unlock_iothread();
1233 cpu_exec_start(cpu);
1234 ret = cpu_exec(cpu);
1235 cpu_exec_end(cpu);
1236 qemu_mutex_lock_iothread();
1237#ifdef CONFIG_PROFILER
1238 tcg_time += profile_getclock() - ti;
1239#endif
1be7fcb8
AB
1240 return ret;
1241}
1242
c93bbbef
AB
1243/* Destroy any remaining vCPUs which have been unplugged and have
1244 * finished running
1245 */
1246static void deal_with_unplugged_cpus(void)
1be7fcb8 1247{
c93bbbef 1248 CPUState *cpu;
1be7fcb8 1249
c93bbbef
AB
1250 CPU_FOREACH(cpu) {
1251 if (cpu->unplug && !cpu_can_run(cpu)) {
1252 qemu_tcg_destroy_vcpu(cpu);
1253 cpu->created = false;
1254 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1255 break;
1256 }
1257 }
1be7fcb8 1258}
bdb7ca67 1259
6546706d
AB
1260/* Single-threaded TCG
1261 *
1262 * In the single-threaded case each vCPU is simulated in turn. If
1263 * there is more than a single vCPU we create a simple timer to kick
1264 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1265 * This is done explicitly rather than relying on side-effects
1266 * elsewhere.
1267 */
1268
37257942 1269static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1270{
c3586ba7 1271 CPUState *cpu = arg;
296af7c9 1272
ab28bd23
PB
1273 rcu_register_thread();
1274
2e7f7a3c 1275 qemu_mutex_lock_iothread();
814e612e 1276 qemu_thread_get_self(cpu->thread);
296af7c9 1277
38fcbd3f
AF
1278 CPU_FOREACH(cpu) {
1279 cpu->thread_id = qemu_get_thread_id();
1280 cpu->created = true;
626cf8f4 1281 cpu->can_do_io = 1;
38fcbd3f 1282 }
296af7c9
BS
1283 qemu_cond_signal(&qemu_cpu_cond);
1284
fa7d1867 1285 /* wait for initial kick-off after machine start */
c28e399c 1286 while (first_cpu->stopped) {
d5f8d613 1287 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1288
1289 /* process any pending work */
bdc44640 1290 CPU_FOREACH(cpu) {
37257942 1291 current_cpu = cpu;
182735ef 1292 qemu_wait_io_event_common(cpu);
8e564b4e 1293 }
0ab07c62 1294 }
296af7c9 1295
6546706d
AB
1296 start_tcg_kick_timer();
1297
c93bbbef
AB
1298 cpu = first_cpu;
1299
e5143e30
AB
1300 /* process any pending work */
1301 cpu->exit_request = 1;
1302
296af7c9 1303 while (1) {
c93bbbef
AB
1304 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1305 qemu_account_warp_timer();
1306
6b8f0187
PB
1307 /* Run the timers here. This is much more efficient than
1308 * waking up the I/O thread and waiting for completion.
1309 */
1310 handle_icount_deadline();
1311
c93bbbef
AB
1312 if (!cpu) {
1313 cpu = first_cpu;
1314 }
1315
e5143e30
AB
1316 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1317
791158d9 1318 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1319 current_cpu = cpu;
c93bbbef
AB
1320
1321 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1322 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1323
1324 if (cpu_can_run(cpu)) {
1325 int r;
05248382
AB
1326
1327 prepare_icount_for_run(cpu);
1328
c93bbbef 1329 r = tcg_cpu_exec(cpu);
05248382
AB
1330
1331 process_icount_data(cpu);
1332
c93bbbef
AB
1333 if (r == EXCP_DEBUG) {
1334 cpu_handle_guest_debug(cpu);
1335 break;
08e73c48
PK
1336 } else if (r == EXCP_ATOMIC) {
1337 qemu_mutex_unlock_iothread();
1338 cpu_exec_step_atomic(cpu);
1339 qemu_mutex_lock_iothread();
1340 break;
c93bbbef 1341 }
37257942 1342 } else if (cpu->stop) {
c93bbbef
AB
1343 if (cpu->unplug) {
1344 cpu = CPU_NEXT(cpu);
1345 }
1346 break;
1347 }
1348
e5143e30
AB
1349 cpu = CPU_NEXT(cpu);
1350 } /* while (cpu && !cpu->exit_request).. */
1351
791158d9
AB
1352 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1353 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1354
e5143e30
AB
1355 if (cpu && cpu->exit_request) {
1356 atomic_mb_set(&cpu->exit_request, 0);
1357 }
ac70aafc 1358
37257942 1359 qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
c93bbbef 1360 deal_with_unplugged_cpus();
296af7c9
BS
1361 }
1362
1363 return NULL;
1364}
1365
b0cb0a66
VP
1366static void *qemu_hax_cpu_thread_fn(void *arg)
1367{
1368 CPUState *cpu = arg;
1369 int r;
b3d3a426
VP
1370
1371 qemu_mutex_lock_iothread();
b0cb0a66 1372 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1373
1374 cpu->thread_id = qemu_get_thread_id();
1375 cpu->created = true;
1376 cpu->halted = 0;
1377 current_cpu = cpu;
1378
1379 hax_init_vcpu(cpu);
1380 qemu_cond_signal(&qemu_cpu_cond);
1381
1382 while (1) {
1383 if (cpu_can_run(cpu)) {
1384 r = hax_smp_cpu_exec(cpu);
1385 if (r == EXCP_DEBUG) {
1386 cpu_handle_guest_debug(cpu);
1387 }
1388 }
1389
1390 while (cpu_thread_is_idle(cpu)) {
1391 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1392 }
1393#ifdef _WIN32
1394 SleepEx(0, TRUE);
1395#endif
1396 qemu_wait_io_event_common(cpu);
1397 }
1398 return NULL;
1399}
1400
1401#ifdef _WIN32
1402static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1403{
1404}
1405#endif
1406
37257942
AB
1407/* Multi-threaded TCG
1408 *
1409 * In the multi-threaded case each vCPU has its own thread. The TLS
1410 * variable current_cpu can be used deep in the code to find the
1411 * current CPUState for a given thread.
1412 */
1413
1414static void *qemu_tcg_cpu_thread_fn(void *arg)
1415{
1416 CPUState *cpu = arg;
1417
bf51c720
AB
1418 g_assert(!use_icount);
1419
37257942
AB
1420 rcu_register_thread();
1421
1422 qemu_mutex_lock_iothread();
1423 qemu_thread_get_self(cpu->thread);
1424
1425 cpu->thread_id = qemu_get_thread_id();
1426 cpu->created = true;
1427 cpu->can_do_io = 1;
1428 current_cpu = cpu;
1429 qemu_cond_signal(&qemu_cpu_cond);
1430
1431 /* process any pending work */
1432 cpu->exit_request = 1;
1433
1434 while (1) {
1435 if (cpu_can_run(cpu)) {
1436 int r;
1437 r = tcg_cpu_exec(cpu);
1438 switch (r) {
1439 case EXCP_DEBUG:
1440 cpu_handle_guest_debug(cpu);
1441 break;
1442 case EXCP_HALTED:
1443 /* during start-up the vCPU is reset and the thread is
1444 * kicked several times. If we don't ensure we go back
1445 * to sleep in the halted state we won't cleanly
1446 * start-up when the vCPU is enabled.
1447 *
1448 * cpu->halted should ensure we sleep in wait_io_event
1449 */
1450 g_assert(cpu->halted);
1451 break;
08e73c48
PK
1452 case EXCP_ATOMIC:
1453 qemu_mutex_unlock_iothread();
1454 cpu_exec_step_atomic(cpu);
1455 qemu_mutex_lock_iothread();
37257942
AB
1456 default:
1457 /* Ignore everything else? */
1458 break;
1459 }
1460 }
1461
37257942
AB
1462 atomic_mb_set(&cpu->exit_request, 0);
1463 qemu_tcg_wait_io_event(cpu);
1464 }
1465
1466 return NULL;
1467}
1468
2ff09a40 1469static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1470{
1471#ifndef _WIN32
1472 int err;
1473
e0c38211
PB
1474 if (cpu->thread_kicked) {
1475 return;
9102deda 1476 }
e0c38211 1477 cpu->thread_kicked = true;
814e612e 1478 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1479 if (err) {
1480 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1481 exit(1);
1482 }
1483#else /* _WIN32 */
b0cb0a66
VP
1484 if (!qemu_cpu_is_self(cpu)) {
1485 if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1486 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1487 __func__, GetLastError());
1488 exit(1);
1489 }
1490 }
e0c38211
PB
1491#endif
1492}
ed9164a3 1493
c08d7424 1494void qemu_cpu_kick(CPUState *cpu)
296af7c9 1495{
f5c121b8 1496 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1497 if (tcg_enabled()) {
791158d9 1498 cpu_exit(cpu);
37257942 1499 /* NOP unless doing single-thread RR */
791158d9 1500 qemu_cpu_kick_rr_cpu();
e0c38211 1501 } else {
b0cb0a66
VP
1502 if (hax_enabled()) {
1503 /*
1504 * FIXME: race condition with the exit_request check in
1505 * hax_vcpu_hax_exec
1506 */
1507 cpu->exit_request = 1;
1508 }
e0c38211
PB
1509 qemu_cpu_kick_thread(cpu);
1510 }
296af7c9
BS
1511}
1512
46d62fac 1513void qemu_cpu_kick_self(void)
296af7c9 1514{
4917cf44 1515 assert(current_cpu);
9102deda 1516 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1517}
1518
60e82579 1519bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1520{
814e612e 1521 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1522}
1523
79e2b9ae 1524bool qemu_in_vcpu_thread(void)
aa723c23 1525{
4917cf44 1526 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1527}
1528
afbe7053
PB
1529static __thread bool iothread_locked = false;
1530
1531bool qemu_mutex_iothread_locked(void)
1532{
1533 return iothread_locked;
1534}
1535
296af7c9
BS
1536void qemu_mutex_lock_iothread(void)
1537{
8d04fb55
JK
1538 g_assert(!qemu_mutex_iothread_locked());
1539 qemu_mutex_lock(&qemu_global_mutex);
afbe7053 1540 iothread_locked = true;
296af7c9
BS
1541}
1542
1543void qemu_mutex_unlock_iothread(void)
1544{
8d04fb55 1545 g_assert(qemu_mutex_iothread_locked());
afbe7053 1546 iothread_locked = false;
296af7c9
BS
1547 qemu_mutex_unlock(&qemu_global_mutex);
1548}
1549
e8faee06 1550static bool all_vcpus_paused(void)
296af7c9 1551{
bdc44640 1552 CPUState *cpu;
296af7c9 1553
bdc44640 1554 CPU_FOREACH(cpu) {
182735ef 1555 if (!cpu->stopped) {
e8faee06 1556 return false;
0ab07c62 1557 }
296af7c9
BS
1558 }
1559
e8faee06 1560 return true;
296af7c9
BS
1561}
1562
1563void pause_all_vcpus(void)
1564{
bdc44640 1565 CPUState *cpu;
296af7c9 1566
40daca54 1567 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1568 CPU_FOREACH(cpu) {
182735ef
AF
1569 cpu->stop = true;
1570 qemu_cpu_kick(cpu);
296af7c9
BS
1571 }
1572
aa723c23 1573 if (qemu_in_vcpu_thread()) {
d798e974 1574 cpu_stop_current();
d798e974
JK
1575 }
1576
296af7c9 1577 while (!all_vcpus_paused()) {
be7d6c57 1578 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1579 CPU_FOREACH(cpu) {
182735ef 1580 qemu_cpu_kick(cpu);
296af7c9
BS
1581 }
1582 }
1583}
1584
2993683b
IM
1585void cpu_resume(CPUState *cpu)
1586{
1587 cpu->stop = false;
1588 cpu->stopped = false;
1589 qemu_cpu_kick(cpu);
1590}
1591
296af7c9
BS
1592void resume_all_vcpus(void)
1593{
bdc44640 1594 CPUState *cpu;
296af7c9 1595
40daca54 1596 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1597 CPU_FOREACH(cpu) {
182735ef 1598 cpu_resume(cpu);
296af7c9
BS
1599 }
1600}
1601
4c055ab5
GZ
1602void cpu_remove(CPUState *cpu)
1603{
1604 cpu->stop = true;
1605 cpu->unplug = true;
1606 qemu_cpu_kick(cpu);
1607}
1608
2c579042
BR
1609void cpu_remove_sync(CPUState *cpu)
1610{
1611 cpu_remove(cpu);
1612 while (cpu->created) {
1613 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1614 }
1615}
1616
4900116e
DDAG
1617/* For temporary buffers for forming a name */
1618#define VCPU_THREAD_NAME_SIZE 16
1619
e5ab30a2 1620static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1621{
4900116e 1622 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1623 static QemuCond *single_tcg_halt_cond;
1624 static QemuThread *single_tcg_cpu_thread;
4900116e 1625
37257942 1626 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1627 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1628 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1629 qemu_cond_init(cpu->halt_cond);
37257942
AB
1630
1631 if (qemu_tcg_mttcg_enabled()) {
1632 /* create a thread per vCPU with TCG (MTTCG) */
1633 parallel_cpus = true;
1634 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1635 cpu->cpu_index);
37257942
AB
1636
1637 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1638 cpu, QEMU_THREAD_JOINABLE);
1639
1640 } else {
1641 /* share a single thread for all cpus with TCG */
1642 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1643 qemu_thread_create(cpu->thread, thread_name,
1644 qemu_tcg_rr_cpu_thread_fn,
1645 cpu, QEMU_THREAD_JOINABLE);
1646
1647 single_tcg_halt_cond = cpu->halt_cond;
1648 single_tcg_cpu_thread = cpu->thread;
1649 }
1ecf47bf 1650#ifdef _WIN32
814e612e 1651 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1652#endif
61a46217 1653 while (!cpu->created) {
18a85728 1654 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1655 }
296af7c9 1656 } else {
37257942
AB
1657 /* For non-MTTCG cases we share the thread */
1658 cpu->thread = single_tcg_cpu_thread;
1659 cpu->halt_cond = single_tcg_halt_cond;
296af7c9
BS
1660 }
1661}
1662
b0cb0a66
VP
1663static void qemu_hax_start_vcpu(CPUState *cpu)
1664{
1665 char thread_name[VCPU_THREAD_NAME_SIZE];
1666
1667 cpu->thread = g_malloc0(sizeof(QemuThread));
1668 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1669 qemu_cond_init(cpu->halt_cond);
1670
1671 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1672 cpu->cpu_index);
1673 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1674 cpu, QEMU_THREAD_JOINABLE);
1675#ifdef _WIN32
1676 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1677#endif
1678 while (!cpu->created) {
1679 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1680 }
1681}
1682
48a106bd 1683static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1684{
4900116e
DDAG
1685 char thread_name[VCPU_THREAD_NAME_SIZE];
1686
814e612e 1687 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1688 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1689 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1690 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1691 cpu->cpu_index);
1692 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1693 cpu, QEMU_THREAD_JOINABLE);
61a46217 1694 while (!cpu->created) {
18a85728 1695 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1696 }
296af7c9
BS
1697}
1698
10a9021d 1699static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1700{
4900116e
DDAG
1701 char thread_name[VCPU_THREAD_NAME_SIZE];
1702
814e612e 1703 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1704 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1705 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1706 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1707 cpu->cpu_index);
1708 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1709 QEMU_THREAD_JOINABLE);
61a46217 1710 while (!cpu->created) {
c7f0f3b1
AL
1711 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1712 }
1713}
1714
c643bed9 1715void qemu_init_vcpu(CPUState *cpu)
296af7c9 1716{
ce3960eb
AF
1717 cpu->nr_cores = smp_cores;
1718 cpu->nr_threads = smp_threads;
f324e766 1719 cpu->stopped = true;
56943e8c
PM
1720
1721 if (!cpu->as) {
1722 /* If the target cpu hasn't set up any address spaces itself,
1723 * give it the default one.
1724 */
6731d864
PC
1725 AddressSpace *as = address_space_init_shareable(cpu->memory,
1726 "cpu-memory");
12ebc9a7 1727 cpu->num_ases = 1;
6731d864 1728 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1729 }
1730
0ab07c62 1731 if (kvm_enabled()) {
48a106bd 1732 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
1733 } else if (hax_enabled()) {
1734 qemu_hax_start_vcpu(cpu);
c7f0f3b1 1735 } else if (tcg_enabled()) {
e5ab30a2 1736 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1737 } else {
10a9021d 1738 qemu_dummy_start_vcpu(cpu);
0ab07c62 1739 }
296af7c9
BS
1740}
1741
b4a3d965 1742void cpu_stop_current(void)
296af7c9 1743{
4917cf44
AF
1744 if (current_cpu) {
1745 current_cpu->stop = false;
1746 current_cpu->stopped = true;
1747 cpu_exit(current_cpu);
96bce683 1748 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1749 }
296af7c9
BS
1750}
1751
56983463 1752int vm_stop(RunState state)
296af7c9 1753{
aa723c23 1754 if (qemu_in_vcpu_thread()) {
74892d24 1755 qemu_system_vmstop_request_prepare();
1dfb4dd9 1756 qemu_system_vmstop_request(state);
296af7c9
BS
1757 /*
1758 * FIXME: should not return to device code in case
1759 * vm_stop() has been requested.
1760 */
b4a3d965 1761 cpu_stop_current();
56983463 1762 return 0;
296af7c9 1763 }
56983463
KW
1764
1765 return do_vm_stop(state);
296af7c9
BS
1766}
1767
2d76e823
CI
1768/**
1769 * Prepare for (re)starting the VM.
1770 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1771 * running or in case of an error condition), 0 otherwise.
1772 */
1773int vm_prepare_start(void)
1774{
1775 RunState requested;
1776 int res = 0;
1777
1778 qemu_vmstop_requested(&requested);
1779 if (runstate_is_running() && requested == RUN_STATE__MAX) {
1780 return -1;
1781 }
1782
1783 /* Ensure that a STOP/RESUME pair of events is emitted if a
1784 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1785 * example, according to documentation is always followed by
1786 * the STOP event.
1787 */
1788 if (runstate_is_running()) {
1789 qapi_event_send_stop(&error_abort);
1790 res = -1;
1791 } else {
1792 replay_enable_events();
1793 cpu_enable_ticks();
1794 runstate_set(RUN_STATE_RUNNING);
1795 vm_state_notify(1, RUN_STATE_RUNNING);
1796 }
1797
1798 /* We are sending this now, but the CPUs will be resumed shortly later */
1799 qapi_event_send_resume(&error_abort);
1800 return res;
1801}
1802
1803void vm_start(void)
1804{
1805 if (!vm_prepare_start()) {
1806 resume_all_vcpus();
1807 }
1808}
1809
8a9236f1
LC
1810/* does a state transition even if the VM is already stopped,
1811 current state is forgotten forever */
56983463 1812int vm_stop_force_state(RunState state)
8a9236f1
LC
1813{
1814 if (runstate_is_running()) {
56983463 1815 return vm_stop(state);
8a9236f1
LC
1816 } else {
1817 runstate_set(state);
b2780d32
WC
1818
1819 bdrv_drain_all();
594a45ce
KW
1820 /* Make sure to return an error if the flush in a previous vm_stop()
1821 * failed. */
22af08ea 1822 return bdrv_flush_all();
8a9236f1
LC
1823 }
1824}
1825
9a78eead 1826void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1827{
1828 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1829#if defined(cpu_list)
1830 cpu_list(f, cpu_fprintf);
262353cb
BS
1831#endif
1832}
de0b36b6
LC
1833
1834CpuInfoList *qmp_query_cpus(Error **errp)
1835{
1836 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1837 CPUState *cpu;
de0b36b6 1838
bdc44640 1839 CPU_FOREACH(cpu) {
de0b36b6 1840 CpuInfoList *info;
182735ef
AF
1841#if defined(TARGET_I386)
1842 X86CPU *x86_cpu = X86_CPU(cpu);
1843 CPUX86State *env = &x86_cpu->env;
1844#elif defined(TARGET_PPC)
1845 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1846 CPUPPCState *env = &ppc_cpu->env;
1847#elif defined(TARGET_SPARC)
1848 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1849 CPUSPARCState *env = &sparc_cpu->env;
1850#elif defined(TARGET_MIPS)
1851 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1852 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1853#elif defined(TARGET_TRICORE)
1854 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1855 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1856#endif
de0b36b6 1857
cb446eca 1858 cpu_synchronize_state(cpu);
de0b36b6
LC
1859
1860 info = g_malloc0(sizeof(*info));
1861 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1862 info->value->CPU = cpu->cpu_index;
182735ef 1863 info->value->current = (cpu == first_cpu);
259186a7 1864 info->value->halted = cpu->halted;
58f88d4b 1865 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1866 info->value->thread_id = cpu->thread_id;
de0b36b6 1867#if defined(TARGET_I386)
86f4b687 1868 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1869 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1870#elif defined(TARGET_PPC)
86f4b687 1871 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1872 info->value->u.ppc.nip = env->nip;
de0b36b6 1873#elif defined(TARGET_SPARC)
86f4b687 1874 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1875 info->value->u.q_sparc.pc = env->pc;
1876 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1877#elif defined(TARGET_MIPS)
86f4b687 1878 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1879 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1880#elif defined(TARGET_TRICORE)
86f4b687 1881 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1882 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1883#else
1884 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1885#endif
1886
1887 /* XXX: waiting for the qapi to support GSList */
1888 if (!cur_item) {
1889 head = cur_item = info;
1890 } else {
1891 cur_item->next = info;
1892 cur_item = info;
1893 }
1894 }
1895
1896 return head;
1897}
0cfd6a9a
LC
1898
1899void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1900 bool has_cpu, int64_t cpu_index, Error **errp)
1901{
1902 FILE *f;
1903 uint32_t l;
55e5c285 1904 CPUState *cpu;
0cfd6a9a 1905 uint8_t buf[1024];
0dc9daf0 1906 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1907
1908 if (!has_cpu) {
1909 cpu_index = 0;
1910 }
1911
151d1322
AF
1912 cpu = qemu_get_cpu(cpu_index);
1913 if (cpu == NULL) {
c6bd8c70
MA
1914 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1915 "a CPU number");
0cfd6a9a
LC
1916 return;
1917 }
1918
1919 f = fopen(filename, "wb");
1920 if (!f) {
618da851 1921 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1922 return;
1923 }
1924
1925 while (size != 0) {
1926 l = sizeof(buf);
1927 if (l > size)
1928 l = size;
2f4d0f59 1929 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1930 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1931 " specified", orig_addr, orig_size);
2f4d0f59
AK
1932 goto exit;
1933 }
0cfd6a9a 1934 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1935 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1936 goto exit;
1937 }
1938 addr += l;
1939 size -= l;
1940 }
1941
1942exit:
1943 fclose(f);
1944}
6d3962bf
LC
1945
1946void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1947 Error **errp)
1948{
1949 FILE *f;
1950 uint32_t l;
1951 uint8_t buf[1024];
1952
1953 f = fopen(filename, "wb");
1954 if (!f) {
618da851 1955 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1956 return;
1957 }
1958
1959 while (size != 0) {
1960 l = sizeof(buf);
1961 if (l > size)
1962 l = size;
eb6282f2 1963 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1964 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1965 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1966 goto exit;
1967 }
1968 addr += l;
1969 size -= l;
1970 }
1971
1972exit:
1973 fclose(f);
1974}
ab49ab5c
LC
1975
1976void qmp_inject_nmi(Error **errp)
1977{
9cb805fd 1978 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1979}
27498bef
ST
1980
1981void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1982{
1983 if (!use_icount) {
1984 return;
1985 }
1986
1987 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1988 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1989 if (icount_align_option) {
1990 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1991 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1992 } else {
1993 cpu_fprintf(f, "Max guest delay NA\n");
1994 cpu_fprintf(f, "Max guest advance NA\n");
1995 }
1996}