4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
50 #include "qapi-event.h"
52 #include "sysemu/replay.h"
56 #include <sys/prctl.h>
59 #define PR_MCE_KILL 33
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
70 #endif /* CONFIG_LINUX */
75 /* vcpu throttling controls */
76 static QEMUTimer
*throttle_timer
;
77 static unsigned int throttle_percentage
;
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
83 bool cpu_is_stopped(CPUState
*cpu
)
85 return cpu
->stopped
|| !runstate_is_running();
88 static bool cpu_thread_is_idle(CPUState
*cpu
)
90 if (cpu
->stop
|| cpu
->queued_work_first
) {
93 if (cpu_is_stopped(cpu
)) {
96 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
97 kvm_halt_in_kernel()) {
103 static bool all_cpu_threads_idle(void)
108 if (!cpu_thread_is_idle(cpu
)) {
115 /***********************************************************/
116 /* guest cycle counter */
118 /* Protected by TimersState seqlock */
120 static bool icount_sleep
= true;
121 static int64_t vm_clock_warp_start
= -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks. */
123 static int icount_time_shift
;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
127 static QEMUTimer
*icount_rt_timer
;
128 static QEMUTimer
*icount_vm_timer
;
129 static QEMUTimer
*icount_warp_timer
;
131 typedef struct TimersState
{
132 /* Protected by BQL. */
133 int64_t cpu_ticks_prev
;
134 int64_t cpu_ticks_offset
;
136 /* cpu_clock_offset can be read out of BQL, so protect it with
139 QemuSeqLock vm_clock_seqlock
;
140 int64_t cpu_clock_offset
;
141 int32_t cpu_ticks_enabled
;
144 /* Compensate for varying guest execution speed. */
145 int64_t qemu_icount_bias
;
146 /* Only written by TCG thread */
150 static TimersState timers_state
;
154 * We default to false if we know other options have been enabled
155 * which are currently incompatible with MTTCG. Otherwise when each
156 * guest (target) has been updated to support:
157 * - atomic instructions
158 * - memory ordering primitives (barriers)
159 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
161 * Once a guest architecture has been converted to the new primitives
162 * there are two remaining limitations to check.
164 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165 * - The host must have a stronger memory order than the guest
167 * It may be possible in future to support strong guests on weak hosts
168 * but that will require tagging all load/stores in a guest with their
169 * implicit memory order requirements which would likely slow things
173 static bool check_tcg_memory_orders_compatible(void)
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176 return (TCG_GUEST_DEFAULT_MO
& ~TCG_TARGET_DEFAULT_MO
) == 0;
182 static bool default_mttcg_enabled(void)
184 QemuOpts
*icount_opts
= qemu_find_opts_singleton("icount");
185 const char *rr
= qemu_opt_get(icount_opts
, "rr");
187 if (rr
|| TCG_OVERSIZED_GUEST
) {
190 #ifdef TARGET_SUPPORTS_MTTCG
191 return check_tcg_memory_orders_compatible();
198 void qemu_tcg_configure(QemuOpts
*opts
, Error
**errp
)
200 const char *t
= qemu_opt_get(opts
, "thread");
202 if (strcmp(t
, "multi") == 0) {
203 if (TCG_OVERSIZED_GUEST
) {
204 error_setg(errp
, "No MTTCG when guest word size > hosts");
206 if (!check_tcg_memory_orders_compatible()) {
207 error_report("Guest expects a stronger memory ordering "
208 "than the host provides");
209 error_printf("This may cause strange/hard to debug errors");
211 mttcg_enabled
= true;
213 } else if (strcmp(t
, "single") == 0) {
214 mttcg_enabled
= false;
216 error_setg(errp
, "Invalid 'thread' setting %s", t
);
219 mttcg_enabled
= default_mttcg_enabled();
223 int64_t cpu_get_icount_raw(void)
226 CPUState
*cpu
= current_cpu
;
228 icount
= timers_state
.qemu_icount
;
230 if (!cpu
->can_do_io
) {
231 fprintf(stderr
, "Bad icount read\n");
234 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
239 /* Return the virtual CPU time, based on the instruction counter. */
240 static int64_t cpu_get_icount_locked(void)
242 int64_t icount
= cpu_get_icount_raw();
243 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
246 int64_t cpu_get_icount(void)
252 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
253 icount
= cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
259 int64_t cpu_icount_to_ns(int64_t icount
)
261 return icount
<< icount_time_shift
;
264 /* return the time elapsed in VM between vm_start and vm_stop. Unless
265 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
268 * Caller must hold the BQL
270 int64_t cpu_get_ticks(void)
275 return cpu_get_icount();
278 ticks
= timers_state
.cpu_ticks_offset
;
279 if (timers_state
.cpu_ticks_enabled
) {
280 ticks
+= cpu_get_host_ticks();
283 if (timers_state
.cpu_ticks_prev
> ticks
) {
284 /* Note: non increasing ticks may happen if the host uses
286 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
287 ticks
= timers_state
.cpu_ticks_prev
;
290 timers_state
.cpu_ticks_prev
= ticks
;
294 static int64_t cpu_get_clock_locked(void)
298 time
= timers_state
.cpu_clock_offset
;
299 if (timers_state
.cpu_ticks_enabled
) {
306 /* Return the monotonic time elapsed in VM, i.e.,
307 * the time between vm_start and vm_stop
309 int64_t cpu_get_clock(void)
315 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
316 ti
= cpu_get_clock_locked();
317 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
322 /* enable cpu_get_ticks()
323 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
325 void cpu_enable_ticks(void)
327 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
328 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
329 if (!timers_state
.cpu_ticks_enabled
) {
330 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
331 timers_state
.cpu_clock_offset
-= get_clock();
332 timers_state
.cpu_ticks_enabled
= 1;
334 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
337 /* disable cpu_get_ticks() : the clock is stopped. You must not call
338 * cpu_get_ticks() after that.
339 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
341 void cpu_disable_ticks(void)
343 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
344 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
345 if (timers_state
.cpu_ticks_enabled
) {
346 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
347 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
348 timers_state
.cpu_ticks_enabled
= 0;
350 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
353 /* Correlation between real and virtual time is always going to be
354 fairly approximate, so ignore small variation.
355 When the guest is idle real and virtual time will be aligned in
357 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
359 static void icount_adjust(void)
365 /* Protected by TimersState mutex. */
366 static int64_t last_delta
;
368 /* If the VM is not running, then do nothing. */
369 if (!runstate_is_running()) {
373 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
374 cur_time
= cpu_get_clock_locked();
375 cur_icount
= cpu_get_icount_locked();
377 delta
= cur_icount
- cur_time
;
378 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
380 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
381 && icount_time_shift
> 0) {
382 /* The guest is getting too far ahead. Slow time down. */
386 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
387 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
388 /* The guest is getting too far behind. Speed time up. */
392 timers_state
.qemu_icount_bias
= cur_icount
393 - (timers_state
.qemu_icount
<< icount_time_shift
);
394 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
397 static void icount_adjust_rt(void *opaque
)
399 timer_mod(icount_rt_timer
,
400 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
404 static void icount_adjust_vm(void *opaque
)
406 timer_mod(icount_vm_timer
,
407 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
408 NANOSECONDS_PER_SECOND
/ 10);
412 static int64_t qemu_icount_round(int64_t count
)
414 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
417 static void icount_warp_rt(void)
422 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
423 * changes from -1 to another value, so the race here is okay.
426 seq
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
427 warp_start
= vm_clock_warp_start
;
428 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, seq
));
430 if (warp_start
== -1) {
434 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
435 if (runstate_is_running()) {
436 int64_t clock
= REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT
,
437 cpu_get_clock_locked());
440 warp_delta
= clock
- vm_clock_warp_start
;
441 if (use_icount
== 2) {
443 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
444 * far ahead of real time.
446 int64_t cur_icount
= cpu_get_icount_locked();
447 int64_t delta
= clock
- cur_icount
;
448 warp_delta
= MIN(warp_delta
, delta
);
450 timers_state
.qemu_icount_bias
+= warp_delta
;
452 vm_clock_warp_start
= -1;
453 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
455 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
456 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
460 static void icount_timer_cb(void *opaque
)
462 /* No need for a checkpoint because the timer already synchronizes
463 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
468 void qtest_clock_warp(int64_t dest
)
470 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
471 AioContext
*aio_context
;
472 assert(qtest_enabled());
473 aio_context
= qemu_get_aio_context();
474 while (clock
< dest
) {
475 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
476 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
478 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
479 timers_state
.qemu_icount_bias
+= warp
;
480 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
482 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
483 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
484 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
486 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
489 void qemu_start_warp_timer(void)
498 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
499 * do not fire, so computing the deadline does not make sense.
501 if (!runstate_is_running()) {
505 /* warp clock deterministically in record/replay mode */
506 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START
)) {
510 if (!all_cpu_threads_idle()) {
514 if (qtest_enabled()) {
515 /* When testing, qtest commands advance icount. */
519 /* We want to use the earliest deadline from ALL vm_clocks */
520 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
521 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
523 static bool notified
;
524 if (!icount_sleep
&& !notified
) {
525 error_report("WARNING: icount sleep disabled and no active timers");
533 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
534 * sleep. Otherwise, the CPU might be waiting for a future timer
535 * interrupt to wake it up, but the interrupt never comes because
536 * the vCPU isn't running any insns and thus doesn't advance the
537 * QEMU_CLOCK_VIRTUAL.
541 * We never let VCPUs sleep in no sleep icount mode.
542 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
543 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
544 * It is useful when we want a deterministic execution time,
545 * isolated from host latencies.
547 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
548 timers_state
.qemu_icount_bias
+= deadline
;
549 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
550 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
553 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
554 * "real" time, (related to the time left until the next event) has
555 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
556 * This avoids that the warps are visible externally; for example,
557 * you will not be sending network packets continuously instead of
560 seqlock_write_begin(&timers_state
.vm_clock_seqlock
);
561 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
562 vm_clock_warp_start
= clock
;
564 seqlock_write_end(&timers_state
.vm_clock_seqlock
);
565 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
567 } else if (deadline
== 0) {
568 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
572 static void qemu_account_warp_timer(void)
574 if (!use_icount
|| !icount_sleep
) {
578 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
579 * do not fire, so computing the deadline does not make sense.
581 if (!runstate_is_running()) {
585 /* warp clock deterministically in record/replay mode */
586 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT
)) {
590 timer_del(icount_warp_timer
);
594 static bool icount_state_needed(void *opaque
)
600 * This is a subsection for icount migration.
602 static const VMStateDescription icount_vmstate_timers
= {
603 .name
= "timer/icount",
605 .minimum_version_id
= 1,
606 .needed
= icount_state_needed
,
607 .fields
= (VMStateField
[]) {
608 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
609 VMSTATE_INT64(qemu_icount
, TimersState
),
610 VMSTATE_END_OF_LIST()
614 static const VMStateDescription vmstate_timers
= {
617 .minimum_version_id
= 1,
618 .fields
= (VMStateField
[]) {
619 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
620 VMSTATE_INT64(dummy
, TimersState
),
621 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
622 VMSTATE_END_OF_LIST()
624 .subsections
= (const VMStateDescription
*[]) {
625 &icount_vmstate_timers
,
630 static void cpu_throttle_thread(CPUState
*cpu
, run_on_cpu_data opaque
)
633 double throttle_ratio
;
636 if (!cpu_throttle_get_percentage()) {
640 pct
= (double)cpu_throttle_get_percentage()/100;
641 throttle_ratio
= pct
/ (1 - pct
);
642 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
644 qemu_mutex_unlock_iothread();
645 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
646 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
647 qemu_mutex_lock_iothread();
650 static void cpu_throttle_timer_tick(void *opaque
)
655 /* Stop the timer if needed */
656 if (!cpu_throttle_get_percentage()) {
660 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
661 async_run_on_cpu(cpu
, cpu_throttle_thread
,
666 pct
= (double)cpu_throttle_get_percentage()/100;
667 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
668 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
671 void cpu_throttle_set(int new_throttle_pct
)
673 /* Ensure throttle percentage is within valid range */
674 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
675 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
677 atomic_set(&throttle_percentage
, new_throttle_pct
);
679 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
680 CPU_THROTTLE_TIMESLICE_NS
);
683 void cpu_throttle_stop(void)
685 atomic_set(&throttle_percentage
, 0);
688 bool cpu_throttle_active(void)
690 return (cpu_throttle_get_percentage() != 0);
693 int cpu_throttle_get_percentage(void)
695 return atomic_read(&throttle_percentage
);
698 void cpu_ticks_init(void)
700 seqlock_init(&timers_state
.vm_clock_seqlock
);
701 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
702 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
703 cpu_throttle_timer_tick
, NULL
);
706 void configure_icount(QemuOpts
*opts
, Error
**errp
)
709 char *rem_str
= NULL
;
711 option
= qemu_opt_get(opts
, "shift");
713 if (qemu_opt_get(opts
, "align") != NULL
) {
714 error_setg(errp
, "Please specify shift option when using align");
719 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
721 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
722 icount_timer_cb
, NULL
);
725 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
727 if (icount_align_option
&& !icount_sleep
) {
728 error_setg(errp
, "align=on and sleep=off are incompatible");
730 if (strcmp(option
, "auto") != 0) {
732 icount_time_shift
= strtol(option
, &rem_str
, 0);
733 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
734 error_setg(errp
, "icount: Invalid shift value");
738 } else if (icount_align_option
) {
739 error_setg(errp
, "shift=auto and align=on are incompatible");
740 } else if (!icount_sleep
) {
741 error_setg(errp
, "shift=auto and sleep=off are incompatible");
746 /* 125MIPS seems a reasonable initial guess at the guest speed.
747 It will be corrected fairly quickly anyway. */
748 icount_time_shift
= 3;
750 /* Have both realtime and virtual time triggers for speed adjustment.
751 The realtime trigger catches emulated time passing too slowly,
752 the virtual time trigger catches emulated time passing too fast.
753 Realtime triggers occur even when idle, so use them less frequently
755 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
756 icount_adjust_rt
, NULL
);
757 timer_mod(icount_rt_timer
,
758 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
759 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
760 icount_adjust_vm
, NULL
);
761 timer_mod(icount_vm_timer
,
762 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
763 NANOSECONDS_PER_SECOND
/ 10);
766 /***********************************************************/
767 /* TCG vCPU kick timer
769 * The kick timer is responsible for moving single threaded vCPU
770 * emulation on to the next vCPU. If more than one vCPU is running a
771 * timer event with force a cpu->exit so the next vCPU can get
774 * The timer is removed if all vCPUs are idle and restarted again once
775 * idleness is complete.
778 static QEMUTimer
*tcg_kick_vcpu_timer
;
779 static CPUState
*tcg_current_rr_cpu
;
781 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
783 static inline int64_t qemu_tcg_next_kick(void)
785 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) + TCG_KICK_PERIOD
;
788 /* Kick the currently round-robin scheduled vCPU */
789 static void qemu_cpu_kick_rr_cpu(void)
793 cpu
= atomic_mb_read(&tcg_current_rr_cpu
);
797 } while (cpu
!= atomic_mb_read(&tcg_current_rr_cpu
));
800 static void kick_tcg_thread(void *opaque
)
802 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
803 qemu_cpu_kick_rr_cpu();
806 static void start_tcg_kick_timer(void)
808 if (!mttcg_enabled
&& !tcg_kick_vcpu_timer
&& CPU_NEXT(first_cpu
)) {
809 tcg_kick_vcpu_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
810 kick_tcg_thread
, NULL
);
811 timer_mod(tcg_kick_vcpu_timer
, qemu_tcg_next_kick());
815 static void stop_tcg_kick_timer(void)
817 if (tcg_kick_vcpu_timer
) {
818 timer_del(tcg_kick_vcpu_timer
);
819 tcg_kick_vcpu_timer
= NULL
;
823 /***********************************************************/
824 void hw_error(const char *fmt
, ...)
830 fprintf(stderr
, "qemu: hardware error: ");
831 vfprintf(stderr
, fmt
, ap
);
832 fprintf(stderr
, "\n");
834 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
835 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
841 void cpu_synchronize_all_states(void)
846 cpu_synchronize_state(cpu
);
850 void cpu_synchronize_all_post_reset(void)
855 cpu_synchronize_post_reset(cpu
);
859 void cpu_synchronize_all_post_init(void)
864 cpu_synchronize_post_init(cpu
);
868 static int do_vm_stop(RunState state
)
872 if (runstate_is_running()) {
876 vm_state_notify(0, state
);
877 qapi_event_send_stop(&error_abort
);
881 replay_disable_events();
882 ret
= bdrv_flush_all();
887 static bool cpu_can_run(CPUState
*cpu
)
892 if (cpu_is_stopped(cpu
)) {
898 static void cpu_handle_guest_debug(CPUState
*cpu
)
900 gdb_set_stop_cpu(cpu
);
901 qemu_system_debug_request();
906 static void sigbus_reraise(void)
909 struct sigaction action
;
911 memset(&action
, 0, sizeof(action
));
912 action
.sa_handler
= SIG_DFL
;
913 if (!sigaction(SIGBUS
, &action
, NULL
)) {
916 sigaddset(&set
, SIGBUS
);
917 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
919 perror("Failed to re-raise SIGBUS!\n");
923 static void sigbus_handler(int n
, siginfo_t
*siginfo
, void *ctx
)
925 if (siginfo
->si_code
!= BUS_MCEERR_AO
&& siginfo
->si_code
!= BUS_MCEERR_AR
) {
930 /* Called asynchronously in VCPU thread. */
931 if (kvm_on_sigbus_vcpu(current_cpu
, siginfo
->si_code
, siginfo
->si_addr
)) {
935 /* Called synchronously (via signalfd) in main thread. */
936 if (kvm_on_sigbus(siginfo
->si_code
, siginfo
->si_addr
)) {
942 static void qemu_init_sigbus(void)
944 struct sigaction action
;
946 memset(&action
, 0, sizeof(action
));
947 action
.sa_flags
= SA_SIGINFO
;
948 action
.sa_sigaction
= sigbus_handler
;
949 sigaction(SIGBUS
, &action
, NULL
);
951 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
954 static void dummy_signal(int sig
)
958 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
962 struct sigaction sigact
;
964 memset(&sigact
, 0, sizeof(sigact
));
965 sigact
.sa_handler
= dummy_signal
;
966 sigaction(SIG_IPI
, &sigact
, NULL
);
968 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
969 sigdelset(&set
, SIGBUS
);
970 pthread_sigmask(SIG_SETMASK
, &set
, NULL
);
971 sigdelset(&set
, SIG_IPI
);
972 r
= kvm_set_signal_mask(cpu
, &set
);
974 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
979 static void qemu_kvm_eat_signals(CPUState
*cpu
)
981 struct timespec ts
= { 0, 0 };
987 sigemptyset(&waitset
);
988 sigaddset(&waitset
, SIG_IPI
);
991 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
992 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
993 perror("sigtimedwait");
997 r
= sigpending(&chkset
);
999 perror("sigpending");
1002 } while (sigismember(&chkset
, SIG_IPI
));
1004 #else /* !CONFIG_LINUX */
1005 static void qemu_init_sigbus(void)
1009 static void qemu_kvm_eat_signals(CPUState
*cpu
)
1013 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
1016 #endif /* !CONFIG_LINUX */
1018 static QemuMutex qemu_global_mutex
;
1020 static QemuThread io_thread
;
1023 static QemuCond qemu_cpu_cond
;
1025 static QemuCond qemu_pause_cond
;
1027 void qemu_init_cpu_loop(void)
1030 qemu_cond_init(&qemu_cpu_cond
);
1031 qemu_cond_init(&qemu_pause_cond
);
1032 qemu_mutex_init(&qemu_global_mutex
);
1034 qemu_thread_get_self(&io_thread
);
1037 void run_on_cpu(CPUState
*cpu
, run_on_cpu_func func
, run_on_cpu_data data
)
1039 do_run_on_cpu(cpu
, func
, data
, &qemu_global_mutex
);
1042 static void qemu_kvm_destroy_vcpu(CPUState
*cpu
)
1044 if (kvm_destroy_vcpu(cpu
) < 0) {
1045 error_report("kvm_destroy_vcpu failed");
1050 static void qemu_tcg_destroy_vcpu(CPUState
*cpu
)
1054 static void qemu_wait_io_event_common(CPUState
*cpu
)
1056 atomic_mb_set(&cpu
->thread_kicked
, false);
1059 cpu
->stopped
= true;
1060 qemu_cond_broadcast(&qemu_pause_cond
);
1062 process_queued_cpu_work(cpu
);
1065 static bool qemu_tcg_should_sleep(CPUState
*cpu
)
1067 if (mttcg_enabled
) {
1068 return cpu_thread_is_idle(cpu
);
1070 return all_cpu_threads_idle();
1074 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
1076 while (qemu_tcg_should_sleep(cpu
)) {
1077 stop_tcg_kick_timer();
1078 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1081 start_tcg_kick_timer();
1083 qemu_wait_io_event_common(cpu
);
1086 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
1088 while (cpu_thread_is_idle(cpu
)) {
1089 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1092 qemu_kvm_eat_signals(cpu
);
1093 qemu_wait_io_event_common(cpu
);
1096 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1098 CPUState
*cpu
= arg
;
1101 rcu_register_thread();
1103 qemu_mutex_lock_iothread();
1104 qemu_thread_get_self(cpu
->thread
);
1105 cpu
->thread_id
= qemu_get_thread_id();
1109 r
= kvm_init_vcpu(cpu
);
1111 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1115 qemu_kvm_init_cpu_signals(cpu
);
1117 /* signal CPU creation */
1118 cpu
->created
= true;
1119 qemu_cond_signal(&qemu_cpu_cond
);
1122 if (cpu_can_run(cpu
)) {
1123 r
= kvm_cpu_exec(cpu
);
1124 if (r
== EXCP_DEBUG
) {
1125 cpu_handle_guest_debug(cpu
);
1128 qemu_kvm_wait_io_event(cpu
);
1129 } while (!cpu
->unplug
|| cpu_can_run(cpu
));
1131 qemu_kvm_destroy_vcpu(cpu
);
1132 cpu
->created
= false;
1133 qemu_cond_signal(&qemu_cpu_cond
);
1134 qemu_mutex_unlock_iothread();
1138 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1141 fprintf(stderr
, "qtest is not supported under Windows\n");
1144 CPUState
*cpu
= arg
;
1148 rcu_register_thread();
1150 qemu_mutex_lock_iothread();
1151 qemu_thread_get_self(cpu
->thread
);
1152 cpu
->thread_id
= qemu_get_thread_id();
1156 sigemptyset(&waitset
);
1157 sigaddset(&waitset
, SIG_IPI
);
1159 /* signal CPU creation */
1160 cpu
->created
= true;
1161 qemu_cond_signal(&qemu_cpu_cond
);
1164 qemu_mutex_unlock_iothread();
1167 r
= sigwait(&waitset
, &sig
);
1168 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1173 qemu_mutex_lock_iothread();
1174 qemu_wait_io_event_common(cpu
);
1181 static int64_t tcg_get_icount_limit(void)
1185 if (replay_mode
!= REPLAY_MODE_PLAY
) {
1186 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1188 /* Maintain prior (possibly buggy) behaviour where if no deadline
1189 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1190 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1193 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1194 deadline
= INT32_MAX
;
1197 return qemu_icount_round(deadline
);
1199 return replay_get_instructions();
1203 static void handle_icount_deadline(void)
1207 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1209 if (deadline
== 0) {
1210 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1215 static int tcg_cpu_exec(CPUState
*cpu
)
1218 #ifdef CONFIG_PROFILER
1222 #ifdef CONFIG_PROFILER
1223 ti
= profile_getclock();
1228 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1229 + cpu
->icount_extra
);
1230 cpu
->icount_decr
.u16
.low
= 0;
1231 cpu
->icount_extra
= 0;
1232 count
= tcg_get_icount_limit();
1233 timers_state
.qemu_icount
+= count
;
1234 decr
= (count
> 0xffff) ? 0xffff : count
;
1236 cpu
->icount_decr
.u16
.low
= decr
;
1237 cpu
->icount_extra
= count
;
1239 qemu_mutex_unlock_iothread();
1240 cpu_exec_start(cpu
);
1241 ret
= cpu_exec(cpu
);
1243 qemu_mutex_lock_iothread();
1244 #ifdef CONFIG_PROFILER
1245 tcg_time
+= profile_getclock() - ti
;
1248 /* Fold pending instructions back into the
1249 instruction counter, and clear the interrupt flag. */
1250 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1251 + cpu
->icount_extra
);
1252 cpu
->icount_decr
.u32
= 0;
1253 cpu
->icount_extra
= 0;
1254 replay_account_executed_instructions();
1259 /* Destroy any remaining vCPUs which have been unplugged and have
1262 static void deal_with_unplugged_cpus(void)
1267 if (cpu
->unplug
&& !cpu_can_run(cpu
)) {
1268 qemu_tcg_destroy_vcpu(cpu
);
1269 cpu
->created
= false;
1270 qemu_cond_signal(&qemu_cpu_cond
);
1276 /* Single-threaded TCG
1278 * In the single-threaded case each vCPU is simulated in turn. If
1279 * there is more than a single vCPU we create a simple timer to kick
1280 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1281 * This is done explicitly rather than relying on side-effects
1285 static void *qemu_tcg_rr_cpu_thread_fn(void *arg
)
1287 CPUState
*cpu
= arg
;
1289 rcu_register_thread();
1291 qemu_mutex_lock_iothread();
1292 qemu_thread_get_self(cpu
->thread
);
1295 cpu
->thread_id
= qemu_get_thread_id();
1296 cpu
->created
= true;
1299 qemu_cond_signal(&qemu_cpu_cond
);
1301 /* wait for initial kick-off after machine start */
1302 while (first_cpu
->stopped
) {
1303 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1305 /* process any pending work */
1308 qemu_wait_io_event_common(cpu
);
1312 start_tcg_kick_timer();
1316 /* process any pending work */
1317 cpu
->exit_request
= 1;
1320 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1321 qemu_account_warp_timer();
1327 while (cpu
&& !cpu
->queued_work_first
&& !cpu
->exit_request
) {
1329 atomic_mb_set(&tcg_current_rr_cpu
, cpu
);
1332 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1333 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1335 if (cpu_can_run(cpu
)) {
1337 r
= tcg_cpu_exec(cpu
);
1338 if (r
== EXCP_DEBUG
) {
1339 cpu_handle_guest_debug(cpu
);
1341 } else if (r
== EXCP_ATOMIC
) {
1342 qemu_mutex_unlock_iothread();
1343 cpu_exec_step_atomic(cpu
);
1344 qemu_mutex_lock_iothread();
1347 } else if (cpu
->stop
) {
1349 cpu
= CPU_NEXT(cpu
);
1354 cpu
= CPU_NEXT(cpu
);
1355 } /* while (cpu && !cpu->exit_request).. */
1357 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1358 atomic_set(&tcg_current_rr_cpu
, NULL
);
1360 if (cpu
&& cpu
->exit_request
) {
1361 atomic_mb_set(&cpu
->exit_request
, 0);
1364 handle_icount_deadline();
1366 qemu_tcg_wait_io_event(cpu
? cpu
: QTAILQ_FIRST(&cpus
));
1367 deal_with_unplugged_cpus();
1373 static void *qemu_hax_cpu_thread_fn(void *arg
)
1375 CPUState
*cpu
= arg
;
1377 qemu_thread_get_self(cpu
->thread
);
1378 qemu_mutex_lock(&qemu_global_mutex
);
1380 cpu
->thread_id
= qemu_get_thread_id();
1381 cpu
->created
= true;
1386 qemu_cond_signal(&qemu_cpu_cond
);
1389 if (cpu_can_run(cpu
)) {
1390 r
= hax_smp_cpu_exec(cpu
);
1391 if (r
== EXCP_DEBUG
) {
1392 cpu_handle_guest_debug(cpu
);
1396 while (cpu_thread_is_idle(cpu
)) {
1397 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1402 qemu_wait_io_event_common(cpu
);
1408 static void CALLBACK
dummy_apc_func(ULONG_PTR unused
)
1413 /* Multi-threaded TCG
1415 * In the multi-threaded case each vCPU has its own thread. The TLS
1416 * variable current_cpu can be used deep in the code to find the
1417 * current CPUState for a given thread.
1420 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1422 CPUState
*cpu
= arg
;
1424 rcu_register_thread();
1426 qemu_mutex_lock_iothread();
1427 qemu_thread_get_self(cpu
->thread
);
1429 cpu
->thread_id
= qemu_get_thread_id();
1430 cpu
->created
= true;
1433 qemu_cond_signal(&qemu_cpu_cond
);
1435 /* process any pending work */
1436 cpu
->exit_request
= 1;
1439 if (cpu_can_run(cpu
)) {
1441 r
= tcg_cpu_exec(cpu
);
1444 cpu_handle_guest_debug(cpu
);
1447 /* during start-up the vCPU is reset and the thread is
1448 * kicked several times. If we don't ensure we go back
1449 * to sleep in the halted state we won't cleanly
1450 * start-up when the vCPU is enabled.
1452 * cpu->halted should ensure we sleep in wait_io_event
1454 g_assert(cpu
->halted
);
1457 qemu_mutex_unlock_iothread();
1458 cpu_exec_step_atomic(cpu
);
1459 qemu_mutex_lock_iothread();
1461 /* Ignore everything else? */
1466 handle_icount_deadline();
1468 atomic_mb_set(&cpu
->exit_request
, 0);
1469 qemu_tcg_wait_io_event(cpu
);
1475 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1480 if (cpu
->thread_kicked
) {
1483 cpu
->thread_kicked
= true;
1484 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1486 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1490 if (!qemu_cpu_is_self(cpu
)) {
1491 if (!QueueUserAPC(dummy_apc_func
, cpu
->hThread
, 0)) {
1492 fprintf(stderr
, "%s: QueueUserAPC failed with error %lu\n",
1493 __func__
, GetLastError());
1500 void qemu_cpu_kick(CPUState
*cpu
)
1502 qemu_cond_broadcast(cpu
->halt_cond
);
1503 if (tcg_enabled()) {
1505 /* NOP unless doing single-thread RR */
1506 qemu_cpu_kick_rr_cpu();
1508 if (hax_enabled()) {
1510 * FIXME: race condition with the exit_request check in
1513 cpu
->exit_request
= 1;
1515 qemu_cpu_kick_thread(cpu
);
1519 void qemu_cpu_kick_self(void)
1521 assert(current_cpu
);
1522 qemu_cpu_kick_thread(current_cpu
);
1525 bool qemu_cpu_is_self(CPUState
*cpu
)
1527 return qemu_thread_is_self(cpu
->thread
);
1530 bool qemu_in_vcpu_thread(void)
1532 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1535 static __thread
bool iothread_locked
= false;
1537 bool qemu_mutex_iothread_locked(void)
1539 return iothread_locked
;
1542 void qemu_mutex_lock_iothread(void)
1544 g_assert(!qemu_mutex_iothread_locked());
1545 qemu_mutex_lock(&qemu_global_mutex
);
1546 iothread_locked
= true;
1549 void qemu_mutex_unlock_iothread(void)
1551 g_assert(qemu_mutex_iothread_locked());
1552 iothread_locked
= false;
1553 qemu_mutex_unlock(&qemu_global_mutex
);
1556 static bool all_vcpus_paused(void)
1561 if (!cpu
->stopped
) {
1569 void pause_all_vcpus(void)
1573 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1579 if (qemu_in_vcpu_thread()) {
1583 while (!all_vcpus_paused()) {
1584 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1591 void cpu_resume(CPUState
*cpu
)
1594 cpu
->stopped
= false;
1598 void resume_all_vcpus(void)
1602 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1608 void cpu_remove(CPUState
*cpu
)
1615 void cpu_remove_sync(CPUState
*cpu
)
1618 while (cpu
->created
) {
1619 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1623 /* For temporary buffers for forming a name */
1624 #define VCPU_THREAD_NAME_SIZE 16
1626 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1628 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1629 static QemuCond
*single_tcg_halt_cond
;
1630 static QemuThread
*single_tcg_cpu_thread
;
1632 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread
) {
1633 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1634 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1635 qemu_cond_init(cpu
->halt_cond
);
1637 if (qemu_tcg_mttcg_enabled()) {
1638 /* create a thread per vCPU with TCG (MTTCG) */
1639 parallel_cpus
= true;
1640 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1643 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1644 cpu
, QEMU_THREAD_JOINABLE
);
1647 /* share a single thread for all cpus with TCG */
1648 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "ALL CPUs/TCG");
1649 qemu_thread_create(cpu
->thread
, thread_name
,
1650 qemu_tcg_rr_cpu_thread_fn
,
1651 cpu
, QEMU_THREAD_JOINABLE
);
1653 single_tcg_halt_cond
= cpu
->halt_cond
;
1654 single_tcg_cpu_thread
= cpu
->thread
;
1657 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1659 while (!cpu
->created
) {
1660 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1663 /* For non-MTTCG cases we share the thread */
1664 cpu
->thread
= single_tcg_cpu_thread
;
1665 cpu
->halt_cond
= single_tcg_halt_cond
;
1669 static void qemu_hax_start_vcpu(CPUState
*cpu
)
1671 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1673 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1674 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1675 qemu_cond_init(cpu
->halt_cond
);
1677 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/HAX",
1679 qemu_thread_create(cpu
->thread
, thread_name
, qemu_hax_cpu_thread_fn
,
1680 cpu
, QEMU_THREAD_JOINABLE
);
1682 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1684 while (!cpu
->created
) {
1685 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1689 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1691 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1693 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1694 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1695 qemu_cond_init(cpu
->halt_cond
);
1696 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1698 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1699 cpu
, QEMU_THREAD_JOINABLE
);
1700 while (!cpu
->created
) {
1701 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1705 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1707 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1709 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1710 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1711 qemu_cond_init(cpu
->halt_cond
);
1712 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1714 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1715 QEMU_THREAD_JOINABLE
);
1716 while (!cpu
->created
) {
1717 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1721 void qemu_init_vcpu(CPUState
*cpu
)
1723 cpu
->nr_cores
= smp_cores
;
1724 cpu
->nr_threads
= smp_threads
;
1725 cpu
->stopped
= true;
1728 /* If the target cpu hasn't set up any address spaces itself,
1729 * give it the default one.
1731 AddressSpace
*as
= address_space_init_shareable(cpu
->memory
,
1734 cpu_address_space_init(cpu
, as
, 0);
1737 if (kvm_enabled()) {
1738 qemu_kvm_start_vcpu(cpu
);
1739 } else if (hax_enabled()) {
1740 qemu_hax_start_vcpu(cpu
);
1741 } else if (tcg_enabled()) {
1742 qemu_tcg_init_vcpu(cpu
);
1744 qemu_dummy_start_vcpu(cpu
);
1748 void cpu_stop_current(void)
1751 current_cpu
->stop
= false;
1752 current_cpu
->stopped
= true;
1753 cpu_exit(current_cpu
);
1754 qemu_cond_broadcast(&qemu_pause_cond
);
1758 int vm_stop(RunState state
)
1760 if (qemu_in_vcpu_thread()) {
1761 qemu_system_vmstop_request_prepare();
1762 qemu_system_vmstop_request(state
);
1764 * FIXME: should not return to device code in case
1765 * vm_stop() has been requested.
1771 return do_vm_stop(state
);
1775 * Prepare for (re)starting the VM.
1776 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1777 * running or in case of an error condition), 0 otherwise.
1779 int vm_prepare_start(void)
1784 qemu_vmstop_requested(&requested
);
1785 if (runstate_is_running() && requested
== RUN_STATE__MAX
) {
1789 /* Ensure that a STOP/RESUME pair of events is emitted if a
1790 * vmstop request was pending. The BLOCK_IO_ERROR event, for
1791 * example, according to documentation is always followed by
1794 if (runstate_is_running()) {
1795 qapi_event_send_stop(&error_abort
);
1798 replay_enable_events();
1800 runstate_set(RUN_STATE_RUNNING
);
1801 vm_state_notify(1, RUN_STATE_RUNNING
);
1804 /* We are sending this now, but the CPUs will be resumed shortly later */
1805 qapi_event_send_resume(&error_abort
);
1811 if (!vm_prepare_start()) {
1816 /* does a state transition even if the VM is already stopped,
1817 current state is forgotten forever */
1818 int vm_stop_force_state(RunState state
)
1820 if (runstate_is_running()) {
1821 return vm_stop(state
);
1823 runstate_set(state
);
1826 /* Make sure to return an error if the flush in a previous vm_stop()
1828 return bdrv_flush_all();
1832 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1834 /* XXX: implement xxx_cpu_list for targets that still miss it */
1835 #if defined(cpu_list)
1836 cpu_list(f
, cpu_fprintf
);
1840 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1842 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1847 #if defined(TARGET_I386)
1848 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1849 CPUX86State
*env
= &x86_cpu
->env
;
1850 #elif defined(TARGET_PPC)
1851 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1852 CPUPPCState
*env
= &ppc_cpu
->env
;
1853 #elif defined(TARGET_SPARC)
1854 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1855 CPUSPARCState
*env
= &sparc_cpu
->env
;
1856 #elif defined(TARGET_MIPS)
1857 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1858 CPUMIPSState
*env
= &mips_cpu
->env
;
1859 #elif defined(TARGET_TRICORE)
1860 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1861 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1864 cpu_synchronize_state(cpu
);
1866 info
= g_malloc0(sizeof(*info
));
1867 info
->value
= g_malloc0(sizeof(*info
->value
));
1868 info
->value
->CPU
= cpu
->cpu_index
;
1869 info
->value
->current
= (cpu
== first_cpu
);
1870 info
->value
->halted
= cpu
->halted
;
1871 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1872 info
->value
->thread_id
= cpu
->thread_id
;
1873 #if defined(TARGET_I386)
1874 info
->value
->arch
= CPU_INFO_ARCH_X86
;
1875 info
->value
->u
.x86
.pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1876 #elif defined(TARGET_PPC)
1877 info
->value
->arch
= CPU_INFO_ARCH_PPC
;
1878 info
->value
->u
.ppc
.nip
= env
->nip
;
1879 #elif defined(TARGET_SPARC)
1880 info
->value
->arch
= CPU_INFO_ARCH_SPARC
;
1881 info
->value
->u
.q_sparc
.pc
= env
->pc
;
1882 info
->value
->u
.q_sparc
.npc
= env
->npc
;
1883 #elif defined(TARGET_MIPS)
1884 info
->value
->arch
= CPU_INFO_ARCH_MIPS
;
1885 info
->value
->u
.q_mips
.PC
= env
->active_tc
.PC
;
1886 #elif defined(TARGET_TRICORE)
1887 info
->value
->arch
= CPU_INFO_ARCH_TRICORE
;
1888 info
->value
->u
.tricore
.PC
= env
->PC
;
1890 info
->value
->arch
= CPU_INFO_ARCH_OTHER
;
1893 /* XXX: waiting for the qapi to support GSList */
1895 head
= cur_item
= info
;
1897 cur_item
->next
= info
;
1905 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1906 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1912 int64_t orig_addr
= addr
, orig_size
= size
;
1918 cpu
= qemu_get_cpu(cpu_index
);
1920 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1925 f
= fopen(filename
, "wb");
1927 error_setg_file_open(errp
, errno
, filename
);
1935 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1936 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1937 " specified", orig_addr
, orig_size
);
1940 if (fwrite(buf
, 1, l
, f
) != l
) {
1941 error_setg(errp
, QERR_IO_ERROR
);
1952 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1959 f
= fopen(filename
, "wb");
1961 error_setg_file_open(errp
, errno
, filename
);
1969 cpu_physical_memory_read(addr
, buf
, l
);
1970 if (fwrite(buf
, 1, l
, f
) != l
) {
1971 error_setg(errp
, QERR_IO_ERROR
);
1982 void qmp_inject_nmi(Error
**errp
)
1984 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1987 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1993 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1994 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1995 if (icount_align_option
) {
1996 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1997 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1999 cpu_fprintf(f
, "Max guest delay NA\n");
2000 cpu_fprintf(f
, "Max guest advance NA\n");