4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
50 #include "qapi-event.h"
52 #include "sysemu/replay.h"
56 #include <sys/prctl.h>
59 #define PR_MCE_KILL 33
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
70 #endif /* CONFIG_LINUX */
75 /* vcpu throttling controls */
76 static QEMUTimer
*throttle_timer
;
77 static unsigned int throttle_percentage
;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState
*cpu
)
85 return cpu
->stopped
|| !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState
*cpu
)
90 if (cpu
->stop
|| cpu
->queued_work_first
) {
93 if (cpu_is_stopped(cpu
)) {
96 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
97 kvm_halt_in_kernel()) {
103 static bool all_cpu_threads_idle(void)
108 if (!cpu_thread_is_idle(cpu
)) {
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep
= true;
121 static int64_t vm_clock_warp_start
= -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift
;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer
*icount_rt_timer
;
128 static QEMUTimer
*icount_vm_timer
;
129 static QEMUTimer
*icount_warp_timer
;
131 typedef struct TimersState
{
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev
;
134 int64_t cpu_ticks_offset
;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
139 QemuSeqLock vm_clock_seqlock
;
140 int64_t cpu_clock_offset
;
141 int32_t cpu_ticks_enabled
;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias
;
146 /* Only written by TCG thread */
150 static TimersState timers_state
;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
182 static bool default_mttcg_enabled(void)
184 if (use_icount
|| TCG_OVERSIZED_GUEST
) {
187 #ifdef TARGET_SUPPORTS_MTTCG
188 return check_tcg_memory_orders_compatible();
195 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
197 const char *t
= qemu_opt_get(opts
, "thread");
199 if (strcmp(t
, "multi") == 0) {
200 if (TCG_OVERSIZED_GUEST
) {
201 error_setg(errp
, "No MTTCG when guest word size > hosts");
202 } else if (use_icount
) {
203 error_setg(errp
, "No MTTCG when icount is enabled");
205 #ifndef TARGET_SUPPORTS_MTTCG
206 error_report("Guest not yet converted to MTTCG - "
207 "you may get unexpected results");
209 if (!check_tcg_memory_orders_compatible()) {
210 error_report("Guest expects a stronger memory ordering "
211 "than the host provides");
212 error_printf("This may cause strange/hard to debug errors\n");
214 mttcg_enabled
= true;
216 } else if (strcmp(t
, "single") == 0) {
217 mttcg_enabled
= false;
219 error_setg(errp
, "Invalid 'thread' setting %s", t
);
222 mttcg_enabled
= default_mttcg_enabled();
226 int64_t cpu_get_icount_raw(void)
229 CPUState
*cpu
= current_cpu
;
231 icount
= timers_state
.qemu_icount
;
232 if (cpu
&& cpu
->running
) {
233 if (!cpu
->can_do_io
) {
234 fprintf(stderr
, "Bad icount read\n");
237 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
242 /* Return the virtual CPU time, based on the instruction counter. */
243 static int64_t cpu_get_icount_locked(void)
245 int64_t icount
= cpu_get_icount_raw();
246 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
249 int64_t cpu_get_icount(void)
255 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
256 icount
= cpu_get_icount_locked();
257 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
262 int64_t cpu_icount_to_ns(int64_t icount
)
264 return icount
<< icount_time_shift
;
267 /* return the time elapsed in VM between vm_start and vm_stop. Unless
268 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
271 * Caller must hold the BQL
273 int64_t cpu_get_ticks(void)
278 return cpu_get_icount();
281 ticks
= timers_state
.cpu_ticks_offset
;
282 if (timers_state
.cpu_ticks_enabled
) {
283 ticks
+= cpu_get_host_ticks();
286 if (timers_state
.cpu_ticks_prev
> ticks
) {
287 /* Note: non increasing ticks may happen if the host uses
289 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
290 ticks
= timers_state
.cpu_ticks_prev
;
293 timers_state
.cpu_ticks_prev
= ticks
;
297 static int64_t cpu_get_clock_locked(void)
301 time
= timers_state
.cpu_clock_offset
;
302 if (timers_state
.cpu_ticks_enabled
) {
309 /* Return the monotonic time elapsed in VM, i.e.,
310 * the time between vm_start and vm_stop
312 int64_t cpu_get_clock(void)
318 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
319 ti
= cpu_get_clock_locked();
320 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
325 /* enable cpu_get_ticks()
326 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
328 void cpu_enable_ticks(void)
330 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
331 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
332 if (!timers_state
.cpu_ticks_enabled
) {
333 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
334 timers_state
.cpu_clock_offset
-= get_clock();
335 timers_state
.cpu_ticks_enabled
= 1;
337 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
340 /* disable cpu_get_ticks() : the clock is stopped. You must not call
341 * cpu_get_ticks() after that.
342 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
344 void cpu_disable_ticks(void)
346 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
347 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
348 if (timers_state
.cpu_ticks_enabled
) {
349 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
350 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
351 timers_state
.cpu_ticks_enabled
= 0;
353 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
356 /* Correlation between real and virtual time is always going to be
357 fairly approximate, so ignore small variation.
358 When the guest is idle real and virtual time will be aligned in
360 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
362 static void icount_adjust(void)
368 /* Protected by TimersState mutex. */
369 static int64_t last_delta
;
371 /* If the VM is not running, then do nothing. */
372 if (!runstate_is_running()) {
376 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
377 cur_time
= cpu_get_clock_locked();
378 cur_icount
= cpu_get_icount_locked();
380 delta
= cur_icount
- cur_time
;
381 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
383 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
384 && icount_time_shift
> 0) {
385 /* The guest is getting too far ahead. Slow time down. */
389 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
390 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
391 /* The guest is getting too far behind. Speed time up. */
395 timers_state
.qemu_icount_bias
= cur_icount
396 - (timers_state
.qemu_icount
<< icount_time_shift
);
397 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
400 static void icount_adjust_rt(void *opaque
)
402 timer_mod(icount_rt_timer
,
403 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
407 static void icount_adjust_vm(void *opaque
)
409 timer_mod(icount_vm_timer
,
410 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
411 NANOSECONDS_PER_SECOND
/ 10);
415 static int64_t qemu_icount_round(int64_t count
)
417 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
420 static void icount_warp_rt(void)
425 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
426 * changes from -1 to another value, so the race here is okay.
429 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
430 warp_start
= vm_clock_warp_start
;
431 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
433 if (warp_start
== -1) {
437 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
438 if (runstate_is_running()) {
439 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
440 cpu_get_clock_locked());
443 warp_delta
= clock
- vm_clock_warp_start
;
444 if (use_icount
== 2) {
446 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
447 * far ahead of real time.
449 int64_t cur_icount
= cpu_get_icount_locked();
450 int64_t delta
= clock
- cur_icount
;
451 warp_delta
= MIN(warp_delta
, delta
);
453 timers_state
.qemu_icount_bias
+= warp_delta
;
455 vm_clock_warp_start
= -1;
456 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
458 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
459 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
463 static void icount_timer_cb(void *opaque
)
465 /* No need for a checkpoint because the timer already synchronizes
466 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
471 void qtest_clock_warp(int64_t dest
)
473 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
474 AioContext
*aio_context
;
475 assert(qtest_enabled());
476 aio_context
= qemu_get_aio_context();
477 while (clock
< dest
) {
478 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
479 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
481 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
482 timers_state
.qemu_icount_bias
+= warp
;
483 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
485 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
486 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
487 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
489 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
492 void qemu_start_warp_timer(void)
501 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
502 * do not fire, so computing the deadline does not make sense.
504 if (!runstate_is_running()) {
508 /* warp clock deterministically in record/replay mode */
509 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
513 if (!all_cpu_threads_idle()) {
517 if (qtest_enabled()) {
518 /* When testing, qtest commands advance icount. */
522 /* We want to use the earliest deadline from ALL vm_clocks */
523 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
524 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
526 static bool notified
;
527 if (!icount_sleep
&& !notified
) {
528 error_report("WARNING: icount sleep disabled and no active timers");
536 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
537 * sleep. Otherwise, the CPU might be waiting for a future timer
538 * interrupt to wake it up, but the interrupt never comes because
539 * the vCPU isn't running any insns and thus doesn't advance the
540 * QEMU_CLOCK_VIRTUAL.
544 * We never let VCPUs sleep in no sleep icount mode.
545 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
546 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
547 * It is useful when we want a deterministic execution time,
548 * isolated from host latencies.
550 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
551 timers_state
.qemu_icount_bias
+= deadline
;
552 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
553 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
556 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
557 * "real" time, (related to the time left until the next event) has
558 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
559 * This avoids that the warps are visible externally; for example,
560 * you will not be sending network packets continuously instead of
563 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
564 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
565 vm_clock_warp_start
= clock
;
567 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
568 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
570 } else if (deadline
== 0) {
571 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
575 static void qemu_account_warp_timer(void)
577 if (!use_icount
|| !icount_sleep
) {
581 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
582 * do not fire, so computing the deadline does not make sense.
584 if (!runstate_is_running()) {
588 /* warp clock deterministically in record/replay mode */
589 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
593 timer_del(icount_warp_timer
);
597 static bool icount_state_needed(void *opaque
)
603 * This is a subsection for icount migration.
605 static const VMStateDescription icount_vmstate_timers
= {
606 .name
= "timer/icount",
608 .minimum_version_id
= 1,
609 .needed
= icount_state_needed
,
610 .fields
= (VMStateField
[]) {
611 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
612 VMSTATE_INT64(qemu_icount
, TimersState
),
613 VMSTATE_END_OF_LIST()
617 static const VMStateDescription vmstate_timers
= {
620 .minimum_version_id
= 1,
621 .fields
= (VMStateField
[]) {
622 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
623 VMSTATE_INT64(dummy
, TimersState
),
624 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
625 VMSTATE_END_OF_LIST()
627 .subsections
= (const VMStateDescription
*[]) {
628 &icount_vmstate_timers
,
633 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
636 double throttle_ratio
;
639 if (!cpu_throttle_get_percentage()) {
643 pct
= (double)cpu_throttle_get_percentage()/100;
644 throttle_ratio
= pct
/ (1 - pct
);
645 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
647 qemu_mutex_unlock_iothread();
648 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
649 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
650 qemu_mutex_lock_iothread();
653 static void cpu_throttle_timer_tick(void *opaque
)
658 /* Stop the timer if needed */
659 if (!cpu_throttle_get_percentage()) {
663 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
664 async_run_on_cpu(cpu
, cpu_throttle_thread
,
669 pct
= (double)cpu_throttle_get_percentage()/100;
670 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
671 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
674 void cpu_throttle_set(int new_throttle_pct
)
676 /* Ensure throttle percentage is within valid range */
677 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
678 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
680 atomic_set(&throttle_percentage
, new_throttle_pct
);
682 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
683 CPU_THROTTLE_TIMESLICE_NS
);
686 void cpu_throttle_stop(void)
688 atomic_set(&throttle_percentage
, 0);
691 bool cpu_throttle_active(void)
693 return (cpu_throttle_get_percentage() != 0);
696 int cpu_throttle_get_percentage(void)
698 return atomic_read(&throttle_percentage
);
701 void cpu_ticks_init(void)
703 seqlock_init(&timers_state
.vm_clock_seqlock
);
704 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
705 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
706 cpu_throttle_timer_tick
, NULL
);
709 void configure_icount(QemuOpts
*opts
, Error
**errp
)
712 char *rem_str
= NULL
;
714 option
= qemu_opt_get(opts
, "shift");
716 if (qemu_opt_get(opts
, "align") != NULL
) {
717 error_setg(errp
, "Please specify shift option when using align");
722 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
724 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
725 icount_timer_cb
, NULL
);
728 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
730 if (icount_align_option
&& !icount_sleep
) {
731 error_setg(errp
, "align=on and sleep=off are incompatible");
733 if (strcmp(option
, "auto") != 0) {
735 icount_time_shift
= strtol(option
, &rem_str
, 0);
736 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
737 error_setg(errp
, "icount: Invalid shift value");
741 } else if (icount_align_option
) {
742 error_setg(errp
, "shift=auto and align=on are incompatible");
743 } else if (!icount_sleep
) {
744 error_setg(errp
, "shift=auto and sleep=off are incompatible");
749 /* 125MIPS seems a reasonable initial guess at the guest speed.
750 It will be corrected fairly quickly anyway. */
751 icount_time_shift
= 3;
753 /* Have both realtime and virtual time triggers for speed adjustment.
754 The realtime trigger catches emulated time passing too slowly,
755 the virtual time trigger catches emulated time passing too fast.
756 Realtime triggers occur even when idle, so use them less frequently
758 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
759 icount_adjust_rt
, NULL
);
760 timer_mod(icount_rt_timer
,
761 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
762 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
763 icount_adjust_vm
, NULL
);
764 timer_mod(icount_vm_timer
,
765 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
766 NANOSECONDS_PER_SECOND
/ 10);
769 /***********************************************************/
770 /* TCG vCPU kick timer
772 * The kick timer is responsible for moving single threaded vCPU
773 * emulation on to the next vCPU. If more than one vCPU is running a
774 * timer event with force a cpu->exit so the next vCPU can get
777 * The timer is removed if all vCPUs are idle and restarted again once
778 * idleness is complete.
781 static QEMUTimer
*tcg_kick_vcpu_timer
;
782 static CPUState
*tcg_current_rr_cpu
;
784 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
786 static inline int64_t qemu_tcg_next_kick(void)
788 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
791 /* Kick the currently round-robin scheduled vCPU */
792 static void qemu_cpu_kick_rr_cpu(void)
796 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
800 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
803 static void do_nothing(CPUState
*cpu
, run_on_cpu_data unused
)
807 void qemu_timer_notify_cb(void *opaque
, QEMUClockType type
)
809 if (!use_icount
|| type
!= QEMU_CLOCK_VIRTUAL
) {
814 if (!qemu_in_vcpu_thread() && first_cpu
) {
815 /* qemu_cpu_kick is not enough to kick a halted CPU out of
816 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
817 * causes cpu_thread_is_idle to return false. This way,
818 * handle_icount_deadline can run.
820 async_run_on_cpu(first_cpu
, do_nothing
, RUN_ON_CPU_NULL
);
824 static void kick_tcg_thread(void *opaque
)
826 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
827 qemu_cpu_kick_rr_cpu();
830 static void start_tcg_kick_timer(void)
832 if (!mttcg_enabled
&& !tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
833 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
834 kick_tcg_thread
, NULL
);
835 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
839 static void stop_tcg_kick_timer(void)
841 if (tcg_kick_vcpu_timer
) {
842 timer_del(tcg_kick_vcpu_timer
);
843 tcg_kick_vcpu_timer
= NULL
;
847 /***********************************************************/
848 void hw_error(const char *fmt
, ...)
854 fprintf(stderr
, "qemu: hardware error: ");
855 vfprintf(stderr
, fmt
, ap
);
856 fprintf(stderr
, "\n");
858 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
859 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
865 void cpu_synchronize_all_states(void)
870 cpu_synchronize_state(cpu
);
874 void cpu_synchronize_all_post_reset(void)
879 cpu_synchronize_post_reset(cpu
);
883 void cpu_synchronize_all_post_init(void)
888 cpu_synchronize_post_init(cpu
);
892 static int do_vm_stop(RunState state
)
896 if (runstate_is_running()) {
900 vm_state_notify(0, state
);
901 qapi_event_send_stop(&error_abort
);
905 replay_disable_events();
906 ret
= bdrv_flush_all();
911 static bool cpu_can_run(CPUState
*cpu
)
916 if (cpu_is_stopped(cpu
)) {
922 static void cpu_handle_guest_debug(CPUState
*cpu
)
924 gdb_set_stop_cpu(cpu
);
925 qemu_system_debug_request();
930 static void sigbus_reraise(void)
933 struct sigaction action
;
935 memset(&action
, 0, sizeof(action
));
936 action
.sa_handler
= SIG_DFL
;
937 if (!sigaction(SIGBUS
, &action
, NULL
)) {
940 sigaddset(&set
, SIGBUS
);
941 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
943 perror("Failed to re-raise SIGBUS!\n");
947 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
949 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
954 /* Called asynchronously in VCPU thread. */
955 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
959 /* Called synchronously (via signalfd) in main thread. */
960 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
966 static void qemu_init_sigbus(void)
968 struct sigaction action
;
970 memset(&action
, 0, sizeof(action
));
971 action
.sa_flags
= SA_SIGINFO
;
972 action
.sa_sigaction
= sigbus_handler
;
973 sigaction(SIGBUS
, &action
, NULL
);
975 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
977 #else /* !CONFIG_LINUX */
978 static void qemu_init_sigbus(void)
981 #endif /* !CONFIG_LINUX */
983 static QemuMutex qemu_global_mutex
;
985 static QemuThread io_thread
;
988 static QemuCond qemu_cpu_cond
;
990 static QemuCond qemu_pause_cond
;
992 void qemu_init_cpu_loop(void)
995 qemu_cond_init(&qemu_cpu_cond
);
996 qemu_cond_init(&qemu_pause_cond
);
997 qemu_mutex_init(&qemu_global_mutex
);
999 qemu_thread_get_self(&io_thread
);
1002 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1004 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1007 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1009 if (kvm_destroy_vcpu(cpu
) < 0) {
1010 error_report("kvm_destroy_vcpu failed");
1015 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1019 static void qemu_wait_io_event_common(CPUState
*cpu
)
1021 atomic_mb_set(&cpu
->thread_kicked
, false);
1024 cpu
->stopped
= true;
1025 qemu_cond_broadcast(&qemu_pause_cond
);
1027 process_queued_cpu_work(cpu
);
1030 static bool qemu_tcg_should_sleep(CPUState
*cpu
)
1032 if (mttcg_enabled
) {
1033 return cpu_thread_is_idle(cpu
);
1035 return all_cpu_threads_idle();
1039 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
1041 while (qemu_tcg_should_sleep(cpu
)) {
1042 stop_tcg_kick_timer();
1043 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1046 start_tcg_kick_timer();
1048 qemu_wait_io_event_common(cpu
);
1051 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1053 while (cpu_thread_is_idle(cpu
)) {
1054 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1057 qemu_wait_io_event_common(cpu
);
1060 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1062 CPUState
*cpu
= arg
;
1065 rcu_register_thread();
1067 qemu_mutex_lock_iothread();
1068 qemu_thread_get_self(cpu
->thread
);
1069 cpu
->thread_id
= qemu_get_thread_id();
1073 r
= kvm_init_vcpu(cpu
);
1075 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1079 kvm_init_cpu_signals(cpu
);
1081 /* signal CPU creation */
1082 cpu
->created
= true;
1083 qemu_cond_signal(&qemu_cpu_cond
);
1086 if (cpu_can_run(cpu
)) {
1087 r
= kvm_cpu_exec(cpu
);
1088 if (r
== EXCP_DEBUG
) {
1089 cpu_handle_guest_debug(cpu
);
1092 qemu_kvm_wait_io_event(cpu
);
1093 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1095 qemu_kvm_destroy_vcpu(cpu
);
1096 cpu
->created
= false;
1097 qemu_cond_signal(&qemu_cpu_cond
);
1098 qemu_mutex_unlock_iothread();
1102 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1105 fprintf(stderr
, "qtest is not supported under Windows\n");
1108 CPUState
*cpu
= arg
;
1112 rcu_register_thread();
1114 qemu_mutex_lock_iothread();
1115 qemu_thread_get_self(cpu
->thread
);
1116 cpu
->thread_id
= qemu_get_thread_id();
1120 sigemptyset(&waitset
);
1121 sigaddset(&waitset
, SIG_IPI
);
1123 /* signal CPU creation */
1124 cpu
->created
= true;
1125 qemu_cond_signal(&qemu_cpu_cond
);
1128 qemu_mutex_unlock_iothread();
1131 r
= sigwait(&waitset
, &sig
);
1132 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1137 qemu_mutex_lock_iothread();
1138 qemu_wait_io_event_common(cpu
);
1145 static int64_t tcg_get_icount_limit(void)
1149 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1150 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1152 /* Maintain prior (possibly buggy) behaviour where if no deadline
1153 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1154 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1157 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1158 deadline
= INT32_MAX
;
1161 return qemu_icount_round(deadline
);
1163 return replay_get_instructions();
1167 static void handle_icount_deadline(void)
1169 assert(qemu_in_vcpu_thread());
1172 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1174 if (deadline
== 0) {
1175 /* Wake up other AioContexts. */
1176 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1177 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
1182 static void prepare_icount_for_run(CPUState
*cpu
)
1188 /* These should always be cleared by process_icount_data after
1189 * each vCPU execution. However u16.high can be raised
1190 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1192 g_assert(cpu
->icount_decr
.u16
.low
== 0);
1193 g_assert(cpu
->icount_extra
== 0);
1196 count
= tcg_get_icount_limit();
1198 timers_state
.qemu_icount
+= count
;
1199 decr
= (count
> 0xffff) ? 0xffff : count
;
1201 cpu
->icount_decr
.u16
.low
= decr
;
1202 cpu
->icount_extra
= count
;
1206 static void process_icount_data(CPUState
*cpu
)
1209 /* Fold pending instructions back into the
1210 instruction counter, and clear the interrupt flag. */
1211 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1212 + cpu
->icount_extra
);
1214 /* Reset the counters */
1215 cpu
->icount_decr
.u16
.low
= 0;
1216 cpu
->icount_extra
= 0;
1217 replay_account_executed_instructions();
1222 static int tcg_cpu_exec(CPUState
*cpu
)
1225 #ifdef CONFIG_PROFILER
1229 #ifdef CONFIG_PROFILER
1230 ti
= profile_getclock();
1232 qemu_mutex_unlock_iothread();
1233 cpu_exec_start(cpu
);
1234 ret
= cpu_exec(cpu
);
1236 qemu_mutex_lock_iothread();
1237 #ifdef CONFIG_PROFILER
1238 tcg_time
+= profile_getclock() - ti
;
1243 /* Destroy any remaining vCPUs which have been unplugged and have
1246 static void deal_with_unplugged_cpus(void)
1251 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1252 qemu_tcg_destroy_vcpu(cpu
);
1253 cpu
->created
= false;
1254 qemu_cond_signal(&qemu_cpu_cond
);
1260 /* Single-threaded TCG
1262 * In the single-threaded case each vCPU is simulated in turn. If
1263 * there is more than a single vCPU we create a simple timer to kick
1264 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1265 * This is done explicitly rather than relying on side-effects
1269 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1271 CPUState
*cpu
= arg
;
1273 rcu_register_thread();
1275 qemu_mutex_lock_iothread();
1276 qemu_thread_get_self(cpu
->thread
);
1279 cpu
->thread_id
= qemu_get_thread_id();
1280 cpu
->created
= true;
1283 qemu_cond_signal(&qemu_cpu_cond
);
1285 /* wait for initial kick-off after machine start */
1286 while (first_cpu
->stopped
) {
1287 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1289 /* process any pending work */
1292 qemu_wait_io_event_common(cpu
);
1296 start_tcg_kick_timer();
1300 /* process any pending work */
1301 cpu
->exit_request
= 1;
1304 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1305 qemu_account_warp_timer();
1307 /* Run the timers here. This is much more efficient than
1308 * waking up the I/O thread and waiting for completion.
1310 handle_icount_deadline();
1316 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1318 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1321 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1322 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1324 if (cpu_can_run(cpu
)) {
1327 prepare_icount_for_run(cpu
);
1329 r
= tcg_cpu_exec(cpu
);
1331 process_icount_data(cpu
);
1333 if (r
== EXCP_DEBUG
) {
1334 cpu_handle_guest_debug(cpu
);
1336 } else if (r
== EXCP_ATOMIC
) {
1337 qemu_mutex_unlock_iothread();
1338 cpu_exec_step_atomic(cpu
);
1339 qemu_mutex_lock_iothread();
1342 } else if (cpu
->stop
) {
1344 cpu
= CPU_NEXT(cpu
);
1349 cpu
= CPU_NEXT(cpu
);
1350 } /* while (cpu && !cpu->exit_request).. */
1352 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1353 atomic_set(&tcg_current_rr_cpu
, NULL
);
1355 if (cpu
&& cpu
->exit_request
) {
1356 atomic_mb_set(&cpu
->exit_request
, 0);
1359 qemu_tcg_wait_io_event(cpu
? cpu
: QTAILQ_FIRST(&cpus
));
1360 deal_with_unplugged_cpus();
1366 static void *qemu_hax_cpu_thread_fn(void *arg
)
1368 CPUState
*cpu
= arg
;
1371 qemu_mutex_lock_iothread();
1372 qemu_thread_get_self(cpu
->thread
);
1374 cpu
->thread_id
= qemu_get_thread_id();
1375 cpu
->created
= true;
1380 qemu_cond_signal(&qemu_cpu_cond
);
1383 if (cpu_can_run(cpu
)) {
1384 r
= hax_smp_cpu_exec(cpu
);
1385 if (r
== EXCP_DEBUG
) {
1386 cpu_handle_guest_debug(cpu
);
1390 while (cpu_thread_is_idle(cpu
)) {
1391 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1396 qemu_wait_io_event_common(cpu
);
1402 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1407 /* Multi-threaded TCG
1409 * In the multi-threaded case each vCPU has its own thread. The TLS
1410 * variable current_cpu can be used deep in the code to find the
1411 * current CPUState for a given thread.
1414 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1416 CPUState
*cpu
= arg
;
1418 g_assert(!use_icount
);
1420 rcu_register_thread();
1422 qemu_mutex_lock_iothread();
1423 qemu_thread_get_self(cpu
->thread
);
1425 cpu
->thread_id
= qemu_get_thread_id();
1426 cpu
->created
= true;
1429 qemu_cond_signal(&qemu_cpu_cond
);
1431 /* process any pending work */
1432 cpu
->exit_request
= 1;
1435 if (cpu_can_run(cpu
)) {
1437 r
= tcg_cpu_exec(cpu
);
1440 cpu_handle_guest_debug(cpu
);
1443 /* during start-up the vCPU is reset and the thread is
1444 * kicked several times. If we don't ensure we go back
1445 * to sleep in the halted state we won't cleanly
1446 * start-up when the vCPU is enabled.
1448 * cpu->halted should ensure we sleep in wait_io_event
1450 g_assert(cpu
->halted
);
1453 qemu_mutex_unlock_iothread();
1454 cpu_exec_step_atomic(cpu
);
1455 qemu_mutex_lock_iothread();
1457 /* Ignore everything else? */
1462 atomic_mb_set(&cpu
->exit_request
, 0);
1463 qemu_tcg_wait_io_event(cpu
);
1469 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1474 if (cpu
->thread_kicked
) {
1477 cpu
->thread_kicked
= true;
1478 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1480 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1484 if (!qemu_cpu_is_self(cpu
)) {
1485 if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1486 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1487 __func__
, GetLastError());
1494 void qemu_cpu_kick(CPUState
*cpu
)
1496 qemu_cond_broadcast(cpu
->halt_cond
);
1497 if (tcg_enabled()) {
1499 /* NOP unless doing single-thread RR */
1500 qemu_cpu_kick_rr_cpu();
1502 if (hax_enabled()) {
1504 * FIXME: race condition with the exit_request check in
1507 cpu
->exit_request
= 1;
1509 qemu_cpu_kick_thread(cpu
);
1513 void qemu_cpu_kick_self(void)
1515 assert(current_cpu
);
1516 qemu_cpu_kick_thread(current_cpu
);
1519 bool qemu_cpu_is_self(CPUState
*cpu
)
1521 return qemu_thread_is_self(cpu
->thread
);
1524 bool qemu_in_vcpu_thread(void)
1526 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1529 static __thread
bool iothread_locked
= false;
1531 bool qemu_mutex_iothread_locked(void)
1533 return iothread_locked
;
1536 void qemu_mutex_lock_iothread(void)
1538 g_assert(!qemu_mutex_iothread_locked());
1539 qemu_mutex_lock(&qemu_global_mutex
);
1540 iothread_locked
= true;
1543 void qemu_mutex_unlock_iothread(void)
1545 g_assert(qemu_mutex_iothread_locked());
1546 iothread_locked
= false;
1547 qemu_mutex_unlock(&qemu_global_mutex
);
1550 static bool all_vcpus_paused(void)
1555 if (!cpu
->stopped
) {
1563 void pause_all_vcpus(void)
1567 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1573 if (qemu_in_vcpu_thread()) {
1577 while (!all_vcpus_paused()) {
1578 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1585 void cpu_resume(CPUState
*cpu
)
1588 cpu
->stopped
= false;
1592 void resume_all_vcpus(void)
1596 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1602 void cpu_remove(CPUState
*cpu
)
1609 void cpu_remove_sync(CPUState
*cpu
)
1612 while (cpu
->created
) {
1613 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1617 /* For temporary buffers for forming a name */
1618 #define VCPU_THREAD_NAME_SIZE 16
1620 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1622 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1623 static QemuCond
*single_tcg_halt_cond
;
1624 static QemuThread
*single_tcg_cpu_thread
;
1626 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1627 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1628 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1629 qemu_cond_init(cpu
->halt_cond
);
1631 if (qemu_tcg_mttcg_enabled()) {
1632 /* create a thread per vCPU with TCG (MTTCG) */
1633 parallel_cpus
= true;
1634 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1637 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1638 cpu
, QEMU_THREAD_JOINABLE
);
1641 /* share a single thread for all cpus with TCG */
1642 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1643 qemu_thread_create(cpu
->thread
, thread_name
,
1644 qemu_tcg_rr_cpu_thread_fn
,
1645 cpu
, QEMU_THREAD_JOINABLE
);
1647 single_tcg_halt_cond
= cpu
->halt_cond
;
1648 single_tcg_cpu_thread
= cpu
->thread
;
1651 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1653 while (!cpu
->created
) {
1654 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1657 /* For non-MTTCG cases we share the thread */
1658 cpu
->thread
= single_tcg_cpu_thread
;
1659 cpu
->halt_cond
= single_tcg_halt_cond
;
1663 static void qemu_hax_start_vcpu(CPUState
*cpu
)
1665 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1667 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1668 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1669 qemu_cond_init(cpu
->halt_cond
);
1671 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
1673 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
1674 cpu
, QEMU_THREAD_JOINABLE
);
1676 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1678 while (!cpu
->created
) {
1679 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1683 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1685 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1687 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1688 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1689 qemu_cond_init(cpu
->halt_cond
);
1690 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1692 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1693 cpu
, QEMU_THREAD_JOINABLE
);
1694 while (!cpu
->created
) {
1695 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1699 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1701 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1703 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1704 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1705 qemu_cond_init(cpu
->halt_cond
);
1706 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1708 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1709 QEMU_THREAD_JOINABLE
);
1710 while (!cpu
->created
) {
1711 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1715 void qemu_init_vcpu(CPUState
*cpu
)
1717 cpu
->nr_cores
= smp_cores
;
1718 cpu
->nr_threads
= smp_threads
;
1719 cpu
->stopped
= true;
1722 /* If the target cpu hasn't set up any address spaces itself,
1723 * give it the default one.
1725 AddressSpace
*as
= address_space_init_shareable(cpu
->memory
,
1728 cpu_address_space_init(cpu
, as
, 0);
1731 if (kvm_enabled()) {
1732 qemu_kvm_start_vcpu(cpu
);
1733 } else if (hax_enabled()) {
1734 qemu_hax_start_vcpu(cpu
);
1735 } else if (tcg_enabled()) {
1736 qemu_tcg_init_vcpu(cpu
);
1738 qemu_dummy_start_vcpu(cpu
);
1742 void cpu_stop_current(void)
1745 current_cpu
->stop
= false;
1746 current_cpu
->stopped
= true;
1747 cpu_exit(current_cpu
);
1748 qemu_cond_broadcast(&qemu_pause_cond
);
1752 int vm_stop(RunState state
)
1754 if (qemu_in_vcpu_thread()) {
1755 qemu_system_vmstop_request_prepare();
1756 qemu_system_vmstop_request(state
);
1758 * FIXME: should not return to device code in case
1759 * vm_stop() has been requested.
1765 return do_vm_stop(state
);
1769 * Prepare for (re)starting the VM.
1770 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1771 * running or in case of an error condition), 0 otherwise.
1773 int vm_prepare_start(void)
1778 qemu_vmstop_requested(&requested
);
1779 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
1783 /* Ensure that a STOP/RESUME pair of events is emitted if a
1784 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1785 * example, according to documentation is always followed by
1788 if (runstate_is_running()) {
1789 qapi_event_send_stop(&error_abort
);
1792 replay_enable_events();
1794 runstate_set(RUN_STATE_RUNNING
);
1795 vm_state_notify(1, RUN_STATE_RUNNING
);
1798 /* We are sending this now, but the CPUs will be resumed shortly later */
1799 qapi_event_send_resume(&error_abort
);
1805 if (!vm_prepare_start()) {
1810 /* does a state transition even if the VM is already stopped,
1811 current state is forgotten forever */
1812 int vm_stop_force_state(RunState state
)
1814 if (runstate_is_running()) {
1815 return vm_stop(state
);
1817 runstate_set(state
);
1820 /* Make sure to return an error if the flush in a previous vm_stop()
1822 return bdrv_flush_all();
1826 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1828 /* XXX: implement xxx_cpu_list for targets that still miss it */
1829 #if defined(cpu_list)
1830 cpu_list(f
, cpu_fprintf
);
1834 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1836 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1841 #if defined(TARGET_I386)
1842 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1843 CPUX86State
*env
= &x86_cpu
->env
;
1844 #elif defined(TARGET_PPC)
1845 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1846 CPUPPCState
*env
= &ppc_cpu
->env
;
1847 #elif defined(TARGET_SPARC)
1848 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1849 CPUSPARCState
*env
= &sparc_cpu
->env
;
1850 #elif defined(TARGET_MIPS)
1851 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1852 CPUMIPSState
*env
= &mips_cpu
->env
;
1853 #elif defined(TARGET_TRICORE)
1854 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1855 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1858 cpu_synchronize_state(cpu
);
1860 info
= g_malloc0(sizeof(*info
));
1861 info
->value
= g_malloc0(sizeof(*info
->value
));
1862 info
->value
->CPU
= cpu
->cpu_index
;
1863 info
->value
->current
= (cpu
== first_cpu
);
1864 info
->value
->halted
= cpu
->halted
;
1865 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1866 info
->value
->thread_id
= cpu
->thread_id
;
1867 #if defined(TARGET_I386)
1868 info
->value
->arch
= CPU_INFO_ARCH_X86
;
1869 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1870 #elif defined(TARGET_PPC)
1871 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
1872 info
->value
->u
.ppc
.nip
= env
->nip
;
1873 #elif defined(TARGET_SPARC)
1874 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
1875 info
->value
->u
.q_sparc
.pc
= env
->pc
;
1876 info
->value
->u
.q_sparc
.npc
= env
->npc
;
1877 #elif defined(TARGET_MIPS)
1878 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
1879 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
1880 #elif defined(TARGET_TRICORE)
1881 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
1882 info
->value
->u
.tricore
.PC
= env
->PC
;
1884 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
1887 /* XXX: waiting for the qapi to support GSList */
1889 head
= cur_item
= info
;
1891 cur_item
->next
= info
;
1899 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1900 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1906 int64_t orig_addr
= addr
, orig_size
= size
;
1912 cpu
= qemu_get_cpu(cpu_index
);
1914 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1919 f
= fopen(filename
, "wb");
1921 error_setg_file_open(errp
, errno
, filename
);
1929 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1930 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1931 " specified", orig_addr
, orig_size
);
1934 if (fwrite(buf
, 1, l
, f
) != l
) {
1935 error_setg(errp
, QERR_IO_ERROR
);
1946 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1953 f
= fopen(filename
, "wb");
1955 error_setg_file_open(errp
, errno
, filename
);
1963 cpu_physical_memory_read(addr
, buf
, l
);
1964 if (fwrite(buf
, 1, l
, f
) != l
) {
1965 error_setg(errp
, QERR_IO_ERROR
);
1976 void qmp_inject_nmi(Error
**errp
)
1978 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1981 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1987 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1988 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1989 if (icount_align_option
) {
1990 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1991 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1993 cpu_fprintf(f
, "Max guest delay NA\n");
1994 cpu_fprintf(f
, "Max guest advance NA\n");