]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
replay: vmstate for replay module
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
182735ef 72static CPUState *next_cpu;
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
946fb27c 152
2a62914b 153int64_t cpu_get_icount_raw(void)
946fb27c
PB
154{
155 int64_t icount;
4917cf44 156 CPUState *cpu = current_cpu;
946fb27c 157
c96778bb 158 icount = timers_state.qemu_icount;
4917cf44 159 if (cpu) {
414b15c9 160 if (!cpu->can_do_io) {
2a62914b
PD
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
946fb27c 163 }
28ecfd7a 164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 165 }
2a62914b
PD
166 return icount;
167}
168
169/* Return the virtual CPU time, based on the instruction counter. */
170static int64_t cpu_get_icount_locked(void)
171{
172 int64_t icount = cpu_get_icount_raw();
3f031313 173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
174}
175
17a15f1b
PB
176int64_t cpu_get_icount(void)
177{
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187}
188
3f031313
FK
189int64_t cpu_icount_to_ns(int64_t icount)
190{
191 return icount << icount_time_shift;
192}
193
d90f3cca
C
194/* return the time elapsed in VM between vm_start and vm_stop. Unless
195 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
196 * counter.
197 *
198 * Caller must hold the BQL
199 */
946fb27c
PB
200int64_t cpu_get_ticks(void)
201{
5f3e3101
PB
202 int64_t ticks;
203
946fb27c
PB
204 if (use_icount) {
205 return cpu_get_icount();
206 }
5f3e3101
PB
207
208 ticks = timers_state.cpu_ticks_offset;
209 if (timers_state.cpu_ticks_enabled) {
4a7428c5 210 ticks += cpu_get_host_ticks();
5f3e3101
PB
211 }
212
213 if (timers_state.cpu_ticks_prev > ticks) {
214 /* Note: non increasing ticks may happen if the host uses
215 software suspend */
216 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
217 ticks = timers_state.cpu_ticks_prev;
946fb27c 218 }
5f3e3101
PB
219
220 timers_state.cpu_ticks_prev = ticks;
221 return ticks;
946fb27c
PB
222}
223
cb365646 224static int64_t cpu_get_clock_locked(void)
946fb27c 225{
1d45cea5 226 int64_t time;
cb365646 227
1d45cea5 228 time = timers_state.cpu_clock_offset;
5f3e3101 229 if (timers_state.cpu_ticks_enabled) {
1d45cea5 230 time += get_clock();
946fb27c 231 }
cb365646 232
1d45cea5 233 return time;
cb365646
LPF
234}
235
d90f3cca 236/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
237 * the time between vm_start and vm_stop
238 */
cb365646
LPF
239int64_t cpu_get_clock(void)
240{
241 int64_t ti;
242 unsigned start;
243
244 do {
245 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
246 ti = cpu_get_clock_locked();
247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
248
249 return ti;
946fb27c
PB
250}
251
cb365646 252/* enable cpu_get_ticks()
3224e878 253 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 254 */
946fb27c
PB
255void cpu_enable_ticks(void)
256{
cb365646 257 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 258 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 259 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 260 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
261 timers_state.cpu_clock_offset -= get_clock();
262 timers_state.cpu_ticks_enabled = 1;
263 }
03719e44 264 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
265}
266
267/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 268 * cpu_get_ticks() after that.
3224e878 269 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 270 */
946fb27c
PB
271void cpu_disable_ticks(void)
272{
cb365646 273 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
03719e44 274 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 275 if (timers_state.cpu_ticks_enabled) {
4a7428c5 276 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 277 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
278 timers_state.cpu_ticks_enabled = 0;
279 }
03719e44 280 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
281}
282
283/* Correlation between real and virtual time is always going to be
284 fairly approximate, so ignore small variation.
285 When the guest is idle real and virtual time will be aligned in
286 the IO wait loop. */
73bcb24d 287#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
288
289static void icount_adjust(void)
290{
291 int64_t cur_time;
292 int64_t cur_icount;
293 int64_t delta;
a3270e19
PB
294
295 /* Protected by TimersState mutex. */
946fb27c 296 static int64_t last_delta;
468cc7cf 297
946fb27c
PB
298 /* If the VM is not running, then do nothing. */
299 if (!runstate_is_running()) {
300 return;
301 }
468cc7cf 302
03719e44 303 seqlock_write_begin(&timers_state.vm_clock_seqlock);
17a15f1b
PB
304 cur_time = cpu_get_clock_locked();
305 cur_icount = cpu_get_icount_locked();
468cc7cf 306
946fb27c
PB
307 delta = cur_icount - cur_time;
308 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
309 if (delta > 0
310 && last_delta + ICOUNT_WOBBLE < delta * 2
311 && icount_time_shift > 0) {
312 /* The guest is getting too far ahead. Slow time down. */
313 icount_time_shift--;
314 }
315 if (delta < 0
316 && last_delta - ICOUNT_WOBBLE > delta * 2
317 && icount_time_shift < MAX_ICOUNT_SHIFT) {
318 /* The guest is getting too far behind. Speed time up. */
319 icount_time_shift++;
320 }
321 last_delta = delta;
c96778bb
FK
322 timers_state.qemu_icount_bias = cur_icount
323 - (timers_state.qemu_icount << icount_time_shift);
03719e44 324 seqlock_write_end(&timers_state.vm_clock_seqlock);
946fb27c
PB
325}
326
327static void icount_adjust_rt(void *opaque)
328{
40daca54 329 timer_mod(icount_rt_timer,
1979b908 330 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
331 icount_adjust();
332}
333
334static void icount_adjust_vm(void *opaque)
335{
40daca54
AB
336 timer_mod(icount_vm_timer,
337 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 338 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
339 icount_adjust();
340}
341
342static int64_t qemu_icount_round(int64_t count)
343{
344 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
345}
346
efab87cf 347static void icount_warp_rt(void)
946fb27c 348{
ccffff48
AB
349 unsigned seq;
350 int64_t warp_start;
351
17a15f1b
PB
352 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
353 * changes from -1 to another value, so the race here is okay.
354 */
ccffff48
AB
355 do {
356 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
357 warp_start = vm_clock_warp_start;
358 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
359
360 if (warp_start == -1) {
946fb27c
PB
361 return;
362 }
363
03719e44 364 seqlock_write_begin(&timers_state.vm_clock_seqlock);
946fb27c 365 if (runstate_is_running()) {
8eda206e
PD
366 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
367 cpu_get_clock_locked());
8ed961d9
PB
368 int64_t warp_delta;
369
370 warp_delta = clock - vm_clock_warp_start;
371 if (use_icount == 2) {
946fb27c 372 /*
40daca54 373 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
374 * far ahead of real time.
375 */
17a15f1b 376 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 377 int64_t delta = clock - cur_icount;
8ed961d9 378 warp_delta = MIN(warp_delta, delta);
946fb27c 379 }
c96778bb 380 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
381 }
382 vm_clock_warp_start = -1;
03719e44 383 seqlock_write_end(&timers_state.vm_clock_seqlock);
8ed961d9
PB
384
385 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
386 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
387 }
946fb27c
PB
388}
389
e76d1798 390static void icount_timer_cb(void *opaque)
efab87cf 391{
e76d1798
PD
392 /* No need for a checkpoint because the timer already synchronizes
393 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
394 */
395 icount_warp_rt();
efab87cf
PD
396}
397
8156be56
PB
398void qtest_clock_warp(int64_t dest)
399{
40daca54 400 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 401 AioContext *aio_context;
8156be56 402 assert(qtest_enabled());
efef88b3 403 aio_context = qemu_get_aio_context();
8156be56 404 while (clock < dest) {
40daca54 405 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 406 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 407
03719e44 408 seqlock_write_begin(&timers_state.vm_clock_seqlock);
c96778bb 409 timers_state.qemu_icount_bias += warp;
03719e44 410 seqlock_write_end(&timers_state.vm_clock_seqlock);
17a15f1b 411
40daca54 412 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 413 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 414 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 415 }
40daca54 416 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
417}
418
e76d1798 419void qemu_start_warp_timer(void)
946fb27c 420{
ce78d18c 421 int64_t clock;
946fb27c
PB
422 int64_t deadline;
423
e76d1798 424 if (!use_icount) {
946fb27c
PB
425 return;
426 }
427
8bd7f71d
PD
428 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
429 * do not fire, so computing the deadline does not make sense.
430 */
431 if (!runstate_is_running()) {
432 return;
433 }
434
435 /* warp clock deterministically in record/replay mode */
e76d1798 436 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
437 return;
438 }
439
ce78d18c 440 if (!all_cpu_threads_idle()) {
946fb27c
PB
441 return;
442 }
443
8156be56
PB
444 if (qtest_enabled()) {
445 /* When testing, qtest commands advance icount. */
e76d1798 446 return;
8156be56
PB
447 }
448
ac70aafc 449 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 450 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 451 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 452 if (deadline < 0) {
d7a0f71d
VC
453 static bool notified;
454 if (!icount_sleep && !notified) {
455 error_report("WARNING: icount sleep disabled and no active timers");
456 notified = true;
457 }
ce78d18c 458 return;
ac70aafc
AB
459 }
460
946fb27c
PB
461 if (deadline > 0) {
462 /*
40daca54 463 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
464 * sleep. Otherwise, the CPU might be waiting for a future timer
465 * interrupt to wake it up, but the interrupt never comes because
466 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 467 * QEMU_CLOCK_VIRTUAL.
946fb27c 468 */
5045e9d9
VC
469 if (!icount_sleep) {
470 /*
471 * We never let VCPUs sleep in no sleep icount mode.
472 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
473 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
474 * It is useful when we want a deterministic execution time,
475 * isolated from host latencies.
476 */
03719e44 477 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9 478 timers_state.qemu_icount_bias += deadline;
03719e44 479 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9
VC
480 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
481 } else {
482 /*
483 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
484 * "real" time, (related to the time left until the next event) has
485 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
486 * This avoids that the warps are visible externally; for example,
487 * you will not be sending network packets continuously instead of
488 * every 100ms.
489 */
03719e44 490 seqlock_write_begin(&timers_state.vm_clock_seqlock);
5045e9d9
VC
491 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
492 vm_clock_warp_start = clock;
493 }
03719e44 494 seqlock_write_end(&timers_state.vm_clock_seqlock);
5045e9d9 495 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 496 }
ac70aafc 497 } else if (deadline == 0) {
40daca54 498 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
499 }
500}
501
e76d1798
PD
502static void qemu_account_warp_timer(void)
503{
504 if (!use_icount || !icount_sleep) {
505 return;
506 }
507
508 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
509 * do not fire, so computing the deadline does not make sense.
510 */
511 if (!runstate_is_running()) {
512 return;
513 }
514
515 /* warp clock deterministically in record/replay mode */
516 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
517 return;
518 }
519
520 timer_del(icount_warp_timer);
521 icount_warp_rt();
522}
523
d09eae37
FK
524static bool icount_state_needed(void *opaque)
525{
526 return use_icount;
527}
528
529/*
530 * This is a subsection for icount migration.
531 */
532static const VMStateDescription icount_vmstate_timers = {
533 .name = "timer/icount",
534 .version_id = 1,
535 .minimum_version_id = 1,
5cd8cada 536 .needed = icount_state_needed,
d09eae37
FK
537 .fields = (VMStateField[]) {
538 VMSTATE_INT64(qemu_icount_bias, TimersState),
539 VMSTATE_INT64(qemu_icount, TimersState),
540 VMSTATE_END_OF_LIST()
541 }
542};
543
946fb27c
PB
544static const VMStateDescription vmstate_timers = {
545 .name = "timer",
546 .version_id = 2,
547 .minimum_version_id = 1,
35d08458 548 .fields = (VMStateField[]) {
946fb27c
PB
549 VMSTATE_INT64(cpu_ticks_offset, TimersState),
550 VMSTATE_INT64(dummy, TimersState),
551 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
552 VMSTATE_END_OF_LIST()
d09eae37 553 },
5cd8cada
JQ
554 .subsections = (const VMStateDescription*[]) {
555 &icount_vmstate_timers,
556 NULL
946fb27c
PB
557 }
558};
559
e0eeb4a2 560static void cpu_throttle_thread(CPUState *cpu, void *opaque)
2adcc85d 561{
2adcc85d
JH
562 double pct;
563 double throttle_ratio;
564 long sleeptime_ns;
565
566 if (!cpu_throttle_get_percentage()) {
567 return;
568 }
569
570 pct = (double)cpu_throttle_get_percentage()/100;
571 throttle_ratio = pct / (1 - pct);
572 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
573
574 qemu_mutex_unlock_iothread();
575 atomic_set(&cpu->throttle_thread_scheduled, 0);
576 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
577 qemu_mutex_lock_iothread();
578}
579
580static void cpu_throttle_timer_tick(void *opaque)
581{
582 CPUState *cpu;
583 double pct;
584
585 /* Stop the timer if needed */
586 if (!cpu_throttle_get_percentage()) {
587 return;
588 }
589 CPU_FOREACH(cpu) {
590 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
e0eeb4a2 591 async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
2adcc85d
JH
592 }
593 }
594
595 pct = (double)cpu_throttle_get_percentage()/100;
596 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
597 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
598}
599
600void cpu_throttle_set(int new_throttle_pct)
601{
602 /* Ensure throttle percentage is within valid range */
603 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
604 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
605
606 atomic_set(&throttle_percentage, new_throttle_pct);
607
608 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
609 CPU_THROTTLE_TIMESLICE_NS);
610}
611
612void cpu_throttle_stop(void)
613{
614 atomic_set(&throttle_percentage, 0);
615}
616
617bool cpu_throttle_active(void)
618{
619 return (cpu_throttle_get_percentage() != 0);
620}
621
622int cpu_throttle_get_percentage(void)
623{
624 return atomic_read(&throttle_percentage);
625}
626
4603ea01
PD
627void cpu_ticks_init(void)
628{
ccdb3c1f 629 seqlock_init(&timers_state.vm_clock_seqlock);
4603ea01 630 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
631 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
632 cpu_throttle_timer_tick, NULL);
4603ea01
PD
633}
634
1ad9580b 635void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 636{
1ad9580b 637 const char *option;
a8bfac37 638 char *rem_str = NULL;
1ad9580b 639
1ad9580b 640 option = qemu_opt_get(opts, "shift");
946fb27c 641 if (!option) {
a8bfac37
ST
642 if (qemu_opt_get(opts, "align") != NULL) {
643 error_setg(errp, "Please specify shift option when using align");
644 }
946fb27c
PB
645 return;
646 }
f1f4b57e
VC
647
648 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
649 if (icount_sleep) {
650 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 651 icount_timer_cb, NULL);
5045e9d9 652 }
f1f4b57e 653
a8bfac37 654 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
655
656 if (icount_align_option && !icount_sleep) {
778d9f9b 657 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 658 }
946fb27c 659 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
660 errno = 0;
661 icount_time_shift = strtol(option, &rem_str, 0);
662 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
663 error_setg(errp, "icount: Invalid shift value");
664 }
946fb27c
PB
665 use_icount = 1;
666 return;
a8bfac37
ST
667 } else if (icount_align_option) {
668 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 669 } else if (!icount_sleep) {
778d9f9b 670 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
671 }
672
673 use_icount = 2;
674
675 /* 125MIPS seems a reasonable initial guess at the guest speed.
676 It will be corrected fairly quickly anyway. */
677 icount_time_shift = 3;
678
679 /* Have both realtime and virtual time triggers for speed adjustment.
680 The realtime trigger catches emulated time passing too slowly,
681 the virtual time trigger catches emulated time passing too fast.
682 Realtime triggers occur even when idle, so use them less frequently
683 than VM triggers. */
bf2a7ddb
PD
684 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
685 icount_adjust_rt, NULL);
40daca54 686 timer_mod(icount_rt_timer,
bf2a7ddb 687 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
688 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
689 icount_adjust_vm, NULL);
690 timer_mod(icount_vm_timer,
691 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 692 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
693}
694
296af7c9
BS
695/***********************************************************/
696void hw_error(const char *fmt, ...)
697{
698 va_list ap;
55e5c285 699 CPUState *cpu;
296af7c9
BS
700
701 va_start(ap, fmt);
702 fprintf(stderr, "qemu: hardware error: ");
703 vfprintf(stderr, fmt, ap);
704 fprintf(stderr, "\n");
bdc44640 705 CPU_FOREACH(cpu) {
55e5c285 706 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 707 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
708 }
709 va_end(ap);
710 abort();
711}
712
713void cpu_synchronize_all_states(void)
714{
182735ef 715 CPUState *cpu;
296af7c9 716
bdc44640 717 CPU_FOREACH(cpu) {
182735ef 718 cpu_synchronize_state(cpu);
296af7c9
BS
719 }
720}
721
722void cpu_synchronize_all_post_reset(void)
723{
182735ef 724 CPUState *cpu;
296af7c9 725
bdc44640 726 CPU_FOREACH(cpu) {
182735ef 727 cpu_synchronize_post_reset(cpu);
296af7c9
BS
728 }
729}
730
731void cpu_synchronize_all_post_init(void)
732{
182735ef 733 CPUState *cpu;
296af7c9 734
bdc44640 735 CPU_FOREACH(cpu) {
182735ef 736 cpu_synchronize_post_init(cpu);
296af7c9
BS
737 }
738}
739
56983463 740static int do_vm_stop(RunState state)
296af7c9 741{
56983463
KW
742 int ret = 0;
743
1354869c 744 if (runstate_is_running()) {
296af7c9 745 cpu_disable_ticks();
296af7c9 746 pause_all_vcpus();
f5bbfba1 747 runstate_set(state);
1dfb4dd9 748 vm_state_notify(0, state);
a4e15de9 749 qapi_event_send_stop(&error_abort);
296af7c9 750 }
56983463 751
594a45ce 752 bdrv_drain_all();
da31d594 753 ret = blk_flush_all();
594a45ce 754
56983463 755 return ret;
296af7c9
BS
756}
757
a1fcaa73 758static bool cpu_can_run(CPUState *cpu)
296af7c9 759{
4fdeee7c 760 if (cpu->stop) {
a1fcaa73 761 return false;
0ab07c62 762 }
321bc0b2 763 if (cpu_is_stopped(cpu)) {
a1fcaa73 764 return false;
0ab07c62 765 }
a1fcaa73 766 return true;
296af7c9
BS
767}
768
91325046 769static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 770{
64f6b346 771 gdb_set_stop_cpu(cpu);
8cf71710 772 qemu_system_debug_request();
f324e766 773 cpu->stopped = true;
3c638d06
JK
774}
775
6d9cb73c
JK
776#ifdef CONFIG_LINUX
777static void sigbus_reraise(void)
778{
779 sigset_t set;
780 struct sigaction action;
781
782 memset(&action, 0, sizeof(action));
783 action.sa_handler = SIG_DFL;
784 if (!sigaction(SIGBUS, &action, NULL)) {
785 raise(SIGBUS);
786 sigemptyset(&set);
787 sigaddset(&set, SIGBUS);
a2d1761d 788 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
789 }
790 perror("Failed to re-raise SIGBUS!\n");
791 abort();
792}
793
794static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
795 void *ctx)
796{
797 if (kvm_on_sigbus(siginfo->ssi_code,
798 (void *)(intptr_t)siginfo->ssi_addr)) {
799 sigbus_reraise();
800 }
801}
802
803static void qemu_init_sigbus(void)
804{
805 struct sigaction action;
806
807 memset(&action, 0, sizeof(action));
808 action.sa_flags = SA_SIGINFO;
809 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
810 sigaction(SIGBUS, &action, NULL);
811
812 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
813}
814
290adf38 815static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
816{
817 struct timespec ts = { 0, 0 };
818 siginfo_t siginfo;
819 sigset_t waitset;
820 sigset_t chkset;
821 int r;
822
823 sigemptyset(&waitset);
824 sigaddset(&waitset, SIG_IPI);
825 sigaddset(&waitset, SIGBUS);
826
827 do {
828 r = sigtimedwait(&waitset, &siginfo, &ts);
829 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
830 perror("sigtimedwait");
831 exit(1);
832 }
833
834 switch (r) {
835 case SIGBUS:
290adf38 836 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
837 sigbus_reraise();
838 }
839 break;
840 default:
841 break;
842 }
843
844 r = sigpending(&chkset);
845 if (r == -1) {
846 perror("sigpending");
847 exit(1);
848 }
849 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
850}
851
6d9cb73c
JK
852#else /* !CONFIG_LINUX */
853
854static void qemu_init_sigbus(void)
855{
856}
1ab3c6c0 857
290adf38 858static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
859{
860}
6d9cb73c
JK
861#endif /* !CONFIG_LINUX */
862
296af7c9 863#ifndef _WIN32
55f8d6ac
JK
864static void dummy_signal(int sig)
865{
866}
55f8d6ac 867
13618e05 868static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
869{
870 int r;
871 sigset_t set;
872 struct sigaction sigact;
873
874 memset(&sigact, 0, sizeof(sigact));
875 sigact.sa_handler = dummy_signal;
876 sigaction(SIG_IPI, &sigact, NULL);
877
714bd040
PB
878 pthread_sigmask(SIG_BLOCK, NULL, &set);
879 sigdelset(&set, SIG_IPI);
714bd040 880 sigdelset(&set, SIGBUS);
491d6e80 881 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
882 if (r) {
883 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
884 exit(1);
885 }
886}
887
55f8d6ac 888#else /* _WIN32 */
13618e05 889static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 890{
714bd040
PB
891 abort();
892}
714bd040 893#endif /* _WIN32 */
ff48eb5f 894
b2532d88 895static QemuMutex qemu_global_mutex;
46daff13 896static QemuCond qemu_io_proceeded_cond;
6b49809c 897static unsigned iothread_requesting_mutex;
296af7c9
BS
898
899static QemuThread io_thread;
900
296af7c9
BS
901/* cpu creation */
902static QemuCond qemu_cpu_cond;
903/* system init */
296af7c9
BS
904static QemuCond qemu_pause_cond;
905
d3b12f5d 906void qemu_init_cpu_loop(void)
296af7c9 907{
6d9cb73c 908 qemu_init_sigbus();
ed94592b 909 qemu_cond_init(&qemu_cpu_cond);
ed94592b 910 qemu_cond_init(&qemu_pause_cond);
46daff13 911 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 912 qemu_mutex_init(&qemu_global_mutex);
296af7c9 913
b7680cb6 914 qemu_thread_get_self(&io_thread);
296af7c9
BS
915}
916
e0eeb4a2 917void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
e82bcec2 918{
d148d90e 919 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
920}
921
4c055ab5
GZ
922static void qemu_kvm_destroy_vcpu(CPUState *cpu)
923{
924 if (kvm_destroy_vcpu(cpu) < 0) {
925 error_report("kvm_destroy_vcpu failed");
926 exit(EXIT_FAILURE);
927 }
928}
929
930static void qemu_tcg_destroy_vcpu(CPUState *cpu)
931{
932}
933
509a0d78 934static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 935{
4fdeee7c
AF
936 if (cpu->stop) {
937 cpu->stop = false;
f324e766 938 cpu->stopped = true;
96bce683 939 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 940 }
a5403c69 941 process_queued_cpu_work(cpu);
216fc9a4 942 cpu->thread_kicked = false;
296af7c9
BS
943}
944
d5f8d613 945static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 946{
16400322 947 while (all_cpu_threads_idle()) {
d5f8d613 948 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 949 }
296af7c9 950
46daff13
PB
951 while (iothread_requesting_mutex) {
952 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
953 }
6cabe1f3 954
bdc44640 955 CPU_FOREACH(cpu) {
182735ef 956 qemu_wait_io_event_common(cpu);
6cabe1f3 957 }
296af7c9
BS
958}
959
fd529e8f 960static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 961{
a98ae1d8 962 while (cpu_thread_is_idle(cpu)) {
f5c121b8 963 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 964 }
296af7c9 965
290adf38 966 qemu_kvm_eat_signals(cpu);
509a0d78 967 qemu_wait_io_event_common(cpu);
296af7c9
BS
968}
969
7e97cd88 970static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 971{
48a106bd 972 CPUState *cpu = arg;
84b4915d 973 int r;
296af7c9 974
ab28bd23
PB
975 rcu_register_thread();
976
2e7f7a3c 977 qemu_mutex_lock_iothread();
814e612e 978 qemu_thread_get_self(cpu->thread);
9f09e18a 979 cpu->thread_id = qemu_get_thread_id();
626cf8f4 980 cpu->can_do_io = 1;
4917cf44 981 current_cpu = cpu;
296af7c9 982
504134d2 983 r = kvm_init_vcpu(cpu);
84b4915d
JK
984 if (r < 0) {
985 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
986 exit(1);
987 }
296af7c9 988
13618e05 989 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
990
991 /* signal CPU creation */
61a46217 992 cpu->created = true;
296af7c9
BS
993 qemu_cond_signal(&qemu_cpu_cond);
994
4c055ab5 995 do {
a1fcaa73 996 if (cpu_can_run(cpu)) {
1458c363 997 r = kvm_cpu_exec(cpu);
83f338f7 998 if (r == EXCP_DEBUG) {
91325046 999 cpu_handle_guest_debug(cpu);
83f338f7 1000 }
0ab07c62 1001 }
fd529e8f 1002 qemu_kvm_wait_io_event(cpu);
4c055ab5 1003 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1004
4c055ab5 1005 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1006 cpu->created = false;
1007 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1008 qemu_mutex_unlock_iothread();
296af7c9
BS
1009 return NULL;
1010}
1011
c7f0f3b1
AL
1012static void *qemu_dummy_cpu_thread_fn(void *arg)
1013{
1014#ifdef _WIN32
1015 fprintf(stderr, "qtest is not supported under Windows\n");
1016 exit(1);
1017#else
10a9021d 1018 CPUState *cpu = arg;
c7f0f3b1
AL
1019 sigset_t waitset;
1020 int r;
1021
ab28bd23
PB
1022 rcu_register_thread();
1023
c7f0f3b1 1024 qemu_mutex_lock_iothread();
814e612e 1025 qemu_thread_get_self(cpu->thread);
9f09e18a 1026 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1027 cpu->can_do_io = 1;
c7f0f3b1
AL
1028
1029 sigemptyset(&waitset);
1030 sigaddset(&waitset, SIG_IPI);
1031
1032 /* signal CPU creation */
61a46217 1033 cpu->created = true;
c7f0f3b1
AL
1034 qemu_cond_signal(&qemu_cpu_cond);
1035
4917cf44 1036 current_cpu = cpu;
c7f0f3b1 1037 while (1) {
4917cf44 1038 current_cpu = NULL;
c7f0f3b1
AL
1039 qemu_mutex_unlock_iothread();
1040 do {
1041 int sig;
1042 r = sigwait(&waitset, &sig);
1043 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1044 if (r == -1) {
1045 perror("sigwait");
1046 exit(1);
1047 }
1048 qemu_mutex_lock_iothread();
4917cf44 1049 current_cpu = cpu;
509a0d78 1050 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1051 }
1052
1053 return NULL;
1054#endif
1055}
1056
bdb7ca67
JK
1057static void tcg_exec_all(void);
1058
7e97cd88 1059static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1060{
c3586ba7 1061 CPUState *cpu = arg;
4c055ab5 1062 CPUState *remove_cpu = NULL;
296af7c9 1063
ab28bd23
PB
1064 rcu_register_thread();
1065
2e7f7a3c 1066 qemu_mutex_lock_iothread();
814e612e 1067 qemu_thread_get_self(cpu->thread);
296af7c9 1068
38fcbd3f
AF
1069 CPU_FOREACH(cpu) {
1070 cpu->thread_id = qemu_get_thread_id();
1071 cpu->created = true;
626cf8f4 1072 cpu->can_do_io = 1;
38fcbd3f 1073 }
296af7c9
BS
1074 qemu_cond_signal(&qemu_cpu_cond);
1075
fa7d1867 1076 /* wait for initial kick-off after machine start */
c28e399c 1077 while (first_cpu->stopped) {
d5f8d613 1078 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1079
1080 /* process any pending work */
bdc44640 1081 CPU_FOREACH(cpu) {
182735ef 1082 qemu_wait_io_event_common(cpu);
8e564b4e 1083 }
0ab07c62 1084 }
296af7c9 1085
21618b3e 1086 /* process any pending work */
aed807c8 1087 atomic_mb_set(&exit_request, 1);
21618b3e 1088
296af7c9 1089 while (1) {
bdb7ca67 1090 tcg_exec_all();
ac70aafc
AB
1091
1092 if (use_icount) {
40daca54 1093 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1094
1095 if (deadline == 0) {
40daca54 1096 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1097 }
3b2319a3 1098 }
d5f8d613 1099 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
4c055ab5
GZ
1100 CPU_FOREACH(cpu) {
1101 if (cpu->unplug && !cpu_can_run(cpu)) {
1102 remove_cpu = cpu;
1103 break;
1104 }
1105 }
1106 if (remove_cpu) {
1107 qemu_tcg_destroy_vcpu(remove_cpu);
2c579042
BR
1108 cpu->created = false;
1109 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5
GZ
1110 remove_cpu = NULL;
1111 }
296af7c9
BS
1112 }
1113
1114 return NULL;
1115}
1116
2ff09a40 1117static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1118{
1119#ifndef _WIN32
1120 int err;
1121
e0c38211
PB
1122 if (cpu->thread_kicked) {
1123 return;
9102deda 1124 }
e0c38211 1125 cpu->thread_kicked = true;
814e612e 1126 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1127 if (err) {
1128 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1129 exit(1);
1130 }
1131#else /* _WIN32 */
e0c38211
PB
1132 abort();
1133#endif
1134}
ed9164a3 1135
e0c38211
PB
1136static void qemu_cpu_kick_no_halt(void)
1137{
1138 CPUState *cpu;
1139 /* Ensure whatever caused the exit has reached the CPU threads before
1140 * writing exit_request.
1141 */
1142 atomic_mb_set(&exit_request, 1);
1143 cpu = atomic_mb_read(&tcg_current_cpu);
1144 if (cpu) {
1145 cpu_exit(cpu);
cc015e9a 1146 }
cc015e9a
PB
1147}
1148
c08d7424 1149void qemu_cpu_kick(CPUState *cpu)
296af7c9 1150{
f5c121b8 1151 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1152 if (tcg_enabled()) {
1153 qemu_cpu_kick_no_halt();
1154 } else {
1155 qemu_cpu_kick_thread(cpu);
1156 }
296af7c9
BS
1157}
1158
46d62fac 1159void qemu_cpu_kick_self(void)
296af7c9 1160{
4917cf44 1161 assert(current_cpu);
9102deda 1162 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1163}
1164
60e82579 1165bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1166{
814e612e 1167 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1168}
1169
79e2b9ae 1170bool qemu_in_vcpu_thread(void)
aa723c23 1171{
4917cf44 1172 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1173}
1174
afbe7053
PB
1175static __thread bool iothread_locked = false;
1176
1177bool qemu_mutex_iothread_locked(void)
1178{
1179 return iothread_locked;
1180}
1181
296af7c9
BS
1182void qemu_mutex_lock_iothread(void)
1183{
21618b3e 1184 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1185 /* In the simple case there is no need to bump the VCPU thread out of
1186 * TCG code execution.
1187 */
1188 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1189 !first_cpu || !first_cpu->created) {
296af7c9 1190 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1191 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1192 } else {
1a28cac3 1193 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1194 qemu_cpu_kick_no_halt();
1a28cac3
MT
1195 qemu_mutex_lock(&qemu_global_mutex);
1196 }
6b49809c 1197 atomic_dec(&iothread_requesting_mutex);
46daff13 1198 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1199 }
afbe7053 1200 iothread_locked = true;
296af7c9
BS
1201}
1202
1203void qemu_mutex_unlock_iothread(void)
1204{
afbe7053 1205 iothread_locked = false;
296af7c9
BS
1206 qemu_mutex_unlock(&qemu_global_mutex);
1207}
1208
1209static int all_vcpus_paused(void)
1210{
bdc44640 1211 CPUState *cpu;
296af7c9 1212
bdc44640 1213 CPU_FOREACH(cpu) {
182735ef 1214 if (!cpu->stopped) {
296af7c9 1215 return 0;
0ab07c62 1216 }
296af7c9
BS
1217 }
1218
1219 return 1;
1220}
1221
1222void pause_all_vcpus(void)
1223{
bdc44640 1224 CPUState *cpu;
296af7c9 1225
40daca54 1226 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1227 CPU_FOREACH(cpu) {
182735ef
AF
1228 cpu->stop = true;
1229 qemu_cpu_kick(cpu);
296af7c9
BS
1230 }
1231
aa723c23 1232 if (qemu_in_vcpu_thread()) {
d798e974
JK
1233 cpu_stop_current();
1234 if (!kvm_enabled()) {
bdc44640 1235 CPU_FOREACH(cpu) {
182735ef
AF
1236 cpu->stop = false;
1237 cpu->stopped = true;
d798e974
JK
1238 }
1239 return;
1240 }
1241 }
1242
296af7c9 1243 while (!all_vcpus_paused()) {
be7d6c57 1244 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1245 CPU_FOREACH(cpu) {
182735ef 1246 qemu_cpu_kick(cpu);
296af7c9
BS
1247 }
1248 }
1249}
1250
2993683b
IM
1251void cpu_resume(CPUState *cpu)
1252{
1253 cpu->stop = false;
1254 cpu->stopped = false;
1255 qemu_cpu_kick(cpu);
1256}
1257
296af7c9
BS
1258void resume_all_vcpus(void)
1259{
bdc44640 1260 CPUState *cpu;
296af7c9 1261
40daca54 1262 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1263 CPU_FOREACH(cpu) {
182735ef 1264 cpu_resume(cpu);
296af7c9
BS
1265 }
1266}
1267
4c055ab5
GZ
1268void cpu_remove(CPUState *cpu)
1269{
1270 cpu->stop = true;
1271 cpu->unplug = true;
1272 qemu_cpu_kick(cpu);
1273}
1274
2c579042
BR
1275void cpu_remove_sync(CPUState *cpu)
1276{
1277 cpu_remove(cpu);
1278 while (cpu->created) {
1279 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1280 }
1281}
1282
4900116e
DDAG
1283/* For temporary buffers for forming a name */
1284#define VCPU_THREAD_NAME_SIZE 16
1285
e5ab30a2 1286static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1287{
4900116e 1288 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1289 static QemuCond *tcg_halt_cond;
1290 static QemuThread *tcg_cpu_thread;
4900116e 1291
296af7c9
BS
1292 /* share a single thread for all cpus with TCG */
1293 if (!tcg_cpu_thread) {
814e612e 1294 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1295 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1296 qemu_cond_init(cpu->halt_cond);
1297 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1298 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1299 cpu->cpu_index);
1300 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1301 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1302#ifdef _WIN32
814e612e 1303 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1304#endif
61a46217 1305 while (!cpu->created) {
18a85728 1306 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1307 }
814e612e 1308 tcg_cpu_thread = cpu->thread;
296af7c9 1309 } else {
814e612e 1310 cpu->thread = tcg_cpu_thread;
f5c121b8 1311 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1312 }
1313}
1314
48a106bd 1315static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1316{
4900116e
DDAG
1317 char thread_name[VCPU_THREAD_NAME_SIZE];
1318
814e612e 1319 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1320 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1321 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1322 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1323 cpu->cpu_index);
1324 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1325 cpu, QEMU_THREAD_JOINABLE);
61a46217 1326 while (!cpu->created) {
18a85728 1327 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1328 }
296af7c9
BS
1329}
1330
10a9021d 1331static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1332{
4900116e
DDAG
1333 char thread_name[VCPU_THREAD_NAME_SIZE];
1334
814e612e 1335 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1336 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1337 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1338 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1339 cpu->cpu_index);
1340 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1341 QEMU_THREAD_JOINABLE);
61a46217 1342 while (!cpu->created) {
c7f0f3b1
AL
1343 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1344 }
1345}
1346
c643bed9 1347void qemu_init_vcpu(CPUState *cpu)
296af7c9 1348{
ce3960eb
AF
1349 cpu->nr_cores = smp_cores;
1350 cpu->nr_threads = smp_threads;
f324e766 1351 cpu->stopped = true;
56943e8c
PM
1352
1353 if (!cpu->as) {
1354 /* If the target cpu hasn't set up any address spaces itself,
1355 * give it the default one.
1356 */
6731d864
PC
1357 AddressSpace *as = address_space_init_shareable(cpu->memory,
1358 "cpu-memory");
12ebc9a7 1359 cpu->num_ases = 1;
6731d864 1360 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1361 }
1362
0ab07c62 1363 if (kvm_enabled()) {
48a106bd 1364 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1365 } else if (tcg_enabled()) {
e5ab30a2 1366 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1367 } else {
10a9021d 1368 qemu_dummy_start_vcpu(cpu);
0ab07c62 1369 }
296af7c9
BS
1370}
1371
b4a3d965 1372void cpu_stop_current(void)
296af7c9 1373{
4917cf44
AF
1374 if (current_cpu) {
1375 current_cpu->stop = false;
1376 current_cpu->stopped = true;
1377 cpu_exit(current_cpu);
96bce683 1378 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1379 }
296af7c9
BS
1380}
1381
56983463 1382int vm_stop(RunState state)
296af7c9 1383{
aa723c23 1384 if (qemu_in_vcpu_thread()) {
74892d24 1385 qemu_system_vmstop_request_prepare();
1dfb4dd9 1386 qemu_system_vmstop_request(state);
296af7c9
BS
1387 /*
1388 * FIXME: should not return to device code in case
1389 * vm_stop() has been requested.
1390 */
b4a3d965 1391 cpu_stop_current();
56983463 1392 return 0;
296af7c9 1393 }
56983463
KW
1394
1395 return do_vm_stop(state);
296af7c9
BS
1396}
1397
8a9236f1
LC
1398/* does a state transition even if the VM is already stopped,
1399 current state is forgotten forever */
56983463 1400int vm_stop_force_state(RunState state)
8a9236f1
LC
1401{
1402 if (runstate_is_running()) {
56983463 1403 return vm_stop(state);
8a9236f1
LC
1404 } else {
1405 runstate_set(state);
b2780d32
WC
1406
1407 bdrv_drain_all();
594a45ce
KW
1408 /* Make sure to return an error if the flush in a previous vm_stop()
1409 * failed. */
da31d594 1410 return blk_flush_all();
8a9236f1
LC
1411 }
1412}
1413
8b427044
PD
1414static int64_t tcg_get_icount_limit(void)
1415{
1416 int64_t deadline;
1417
1418 if (replay_mode != REPLAY_MODE_PLAY) {
1419 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1420
1421 /* Maintain prior (possibly buggy) behaviour where if no deadline
1422 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1423 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1424 * nanoseconds.
1425 */
1426 if ((deadline < 0) || (deadline > INT32_MAX)) {
1427 deadline = INT32_MAX;
1428 }
1429
1430 return qemu_icount_round(deadline);
1431 } else {
1432 return replay_get_instructions();
1433 }
1434}
1435
3d57f789 1436static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1437{
1438 int ret;
1439#ifdef CONFIG_PROFILER
1440 int64_t ti;
1441#endif
1442
1443#ifdef CONFIG_PROFILER
1444 ti = profile_getclock();
1445#endif
1446 if (use_icount) {
1447 int64_t count;
1448 int decr;
c96778bb
FK
1449 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1450 + cpu->icount_extra);
28ecfd7a 1451 cpu->icount_decr.u16.low = 0;
efee7340 1452 cpu->icount_extra = 0;
8b427044 1453 count = tcg_get_icount_limit();
c96778bb 1454 timers_state.qemu_icount += count;
296af7c9
BS
1455 decr = (count > 0xffff) ? 0xffff : count;
1456 count -= decr;
28ecfd7a 1457 cpu->icount_decr.u16.low = decr;
efee7340 1458 cpu->icount_extra = count;
296af7c9 1459 }
ab129972 1460 cpu_exec_start(cpu);
ea3e9847 1461 ret = cpu_exec(cpu);
ab129972 1462 cpu_exec_end(cpu);
296af7c9 1463#ifdef CONFIG_PROFILER
89d5cbdd 1464 tcg_time += profile_getclock() - ti;
296af7c9
BS
1465#endif
1466 if (use_icount) {
1467 /* Fold pending instructions back into the
1468 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1469 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1470 + cpu->icount_extra);
28ecfd7a 1471 cpu->icount_decr.u32 = 0;
efee7340 1472 cpu->icount_extra = 0;
8b427044 1473 replay_account_executed_instructions();
296af7c9
BS
1474 }
1475 return ret;
1476}
1477
bdb7ca67 1478static void tcg_exec_all(void)
296af7c9 1479{
9a36085b
JK
1480 int r;
1481
40daca54 1482 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1483 qemu_account_warp_timer();
ab33fcda 1484
0ab07c62 1485 if (next_cpu == NULL) {
296af7c9 1486 next_cpu = first_cpu;
0ab07c62 1487 }
bdc44640 1488 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1489 CPUState *cpu = next_cpu;
296af7c9 1490
40daca54 1491 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1492 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1493
a1fcaa73 1494 if (cpu_can_run(cpu)) {
3d57f789 1495 r = tcg_cpu_exec(cpu);
9a36085b 1496 if (r == EXCP_DEBUG) {
91325046 1497 cpu_handle_guest_debug(cpu);
3c638d06
JK
1498 break;
1499 }
f324e766 1500 } else if (cpu->stop || cpu->stopped) {
4c055ab5
GZ
1501 if (cpu->unplug) {
1502 next_cpu = CPU_NEXT(cpu);
1503 }
296af7c9
BS
1504 break;
1505 }
1506 }
aed807c8
PB
1507
1508 /* Pairs with smp_wmb in qemu_cpu_kick. */
1509 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1510}
1511
9a78eead 1512void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1513{
1514 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1515#if defined(cpu_list)
1516 cpu_list(f, cpu_fprintf);
262353cb
BS
1517#endif
1518}
de0b36b6
LC
1519
1520CpuInfoList *qmp_query_cpus(Error **errp)
1521{
1522 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1523 CPUState *cpu;
de0b36b6 1524
bdc44640 1525 CPU_FOREACH(cpu) {
de0b36b6 1526 CpuInfoList *info;
182735ef
AF
1527#if defined(TARGET_I386)
1528 X86CPU *x86_cpu = X86_CPU(cpu);
1529 CPUX86State *env = &x86_cpu->env;
1530#elif defined(TARGET_PPC)
1531 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1532 CPUPPCState *env = &ppc_cpu->env;
1533#elif defined(TARGET_SPARC)
1534 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1535 CPUSPARCState *env = &sparc_cpu->env;
1536#elif defined(TARGET_MIPS)
1537 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1538 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1539#elif defined(TARGET_TRICORE)
1540 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1541 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1542#endif
de0b36b6 1543
cb446eca 1544 cpu_synchronize_state(cpu);
de0b36b6
LC
1545
1546 info = g_malloc0(sizeof(*info));
1547 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1548 info->value->CPU = cpu->cpu_index;
182735ef 1549 info->value->current = (cpu == first_cpu);
259186a7 1550 info->value->halted = cpu->halted;
58f88d4b 1551 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1552 info->value->thread_id = cpu->thread_id;
de0b36b6 1553#if defined(TARGET_I386)
86f4b687 1554 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1555 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1556#elif defined(TARGET_PPC)
86f4b687 1557 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1558 info->value->u.ppc.nip = env->nip;
de0b36b6 1559#elif defined(TARGET_SPARC)
86f4b687 1560 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1561 info->value->u.q_sparc.pc = env->pc;
1562 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1563#elif defined(TARGET_MIPS)
86f4b687 1564 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1565 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1566#elif defined(TARGET_TRICORE)
86f4b687 1567 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1568 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1569#else
1570 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1571#endif
1572
1573 /* XXX: waiting for the qapi to support GSList */
1574 if (!cur_item) {
1575 head = cur_item = info;
1576 } else {
1577 cur_item->next = info;
1578 cur_item = info;
1579 }
1580 }
1581
1582 return head;
1583}
0cfd6a9a
LC
1584
1585void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1586 bool has_cpu, int64_t cpu_index, Error **errp)
1587{
1588 FILE *f;
1589 uint32_t l;
55e5c285 1590 CPUState *cpu;
0cfd6a9a 1591 uint8_t buf[1024];
0dc9daf0 1592 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1593
1594 if (!has_cpu) {
1595 cpu_index = 0;
1596 }
1597
151d1322
AF
1598 cpu = qemu_get_cpu(cpu_index);
1599 if (cpu == NULL) {
c6bd8c70
MA
1600 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1601 "a CPU number");
0cfd6a9a
LC
1602 return;
1603 }
1604
1605 f = fopen(filename, "wb");
1606 if (!f) {
618da851 1607 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1608 return;
1609 }
1610
1611 while (size != 0) {
1612 l = sizeof(buf);
1613 if (l > size)
1614 l = size;
2f4d0f59 1615 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1616 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1617 " specified", orig_addr, orig_size);
2f4d0f59
AK
1618 goto exit;
1619 }
0cfd6a9a 1620 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1621 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1622 goto exit;
1623 }
1624 addr += l;
1625 size -= l;
1626 }
1627
1628exit:
1629 fclose(f);
1630}
6d3962bf
LC
1631
1632void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1633 Error **errp)
1634{
1635 FILE *f;
1636 uint32_t l;
1637 uint8_t buf[1024];
1638
1639 f = fopen(filename, "wb");
1640 if (!f) {
618da851 1641 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1642 return;
1643 }
1644
1645 while (size != 0) {
1646 l = sizeof(buf);
1647 if (l > size)
1648 l = size;
eb6282f2 1649 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1650 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1651 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1652 goto exit;
1653 }
1654 addr += l;
1655 size -= l;
1656 }
1657
1658exit:
1659 fclose(f);
1660}
ab49ab5c
LC
1661
1662void qmp_inject_nmi(Error **errp)
1663{
9cb805fd 1664 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1665}
27498bef
ST
1666
1667void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1668{
1669 if (!use_icount) {
1670 return;
1671 }
1672
1673 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1674 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1675 if (icount_align_option) {
1676 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1677 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1678 } else {
1679 cpu_fprintf(f, "Max guest delay NA\n");
1680 cpu_fprintf(f, "Max guest advance NA\n");
1681 }
1682}