4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState
*next_cpu
;
68 bool cpu_is_stopped(CPUState
*cpu
)
70 return cpu
->stopped
|| !runstate_is_running();
73 static bool cpu_thread_is_idle(CPUState
*cpu
)
75 if (cpu
->stop
|| cpu
->queued_work_first
) {
78 if (cpu_is_stopped(cpu
)) {
81 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
82 kvm_halt_in_kernel()) {
88 static bool all_cpu_threads_idle(void)
93 if (!cpu_thread_is_idle(cpu
)) {
100 /***********************************************************/
101 /* guest cycle counter */
103 /* Protected by TimersState seqlock */
105 static int64_t vm_clock_warp_start
;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift
;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 static QEMUTimer
*icount_rt_timer
;
112 static QEMUTimer
*icount_vm_timer
;
113 static QEMUTimer
*icount_warp_timer
;
115 typedef struct TimersState
{
116 /* Protected by BQL. */
117 int64_t cpu_ticks_prev
;
118 int64_t cpu_ticks_offset
;
120 /* cpu_clock_offset can be read out of BQL, so protect it with
123 QemuSeqLock vm_clock_seqlock
;
124 int64_t cpu_clock_offset
;
125 int32_t cpu_ticks_enabled
;
128 /* Compensate for varying guest execution speed. */
129 int64_t qemu_icount_bias
;
130 /* Only written by TCG thread */
134 static TimersState timers_state
;
136 /* Return the virtual CPU time, based on the instruction counter. */
137 static int64_t cpu_get_icount_locked(void)
140 CPUState
*cpu
= current_cpu
;
142 icount
= timers_state
.qemu_icount
;
144 if (!cpu_can_do_io(cpu
)) {
145 fprintf(stderr
, "Bad clock read\n");
147 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
149 return timers_state
.qemu_icount_bias
+ (icount
<< icount_time_shift
);
152 int64_t cpu_get_icount(void)
158 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
159 icount
= cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
165 /* return the host CPU cycle counter and handle stop/restart */
166 /* Caller must hold the BQL */
167 int64_t cpu_get_ticks(void)
172 return cpu_get_icount();
175 ticks
= timers_state
.cpu_ticks_offset
;
176 if (timers_state
.cpu_ticks_enabled
) {
177 ticks
+= cpu_get_real_ticks();
180 if (timers_state
.cpu_ticks_prev
> ticks
) {
181 /* Note: non increasing ticks may happen if the host uses
183 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
184 ticks
= timers_state
.cpu_ticks_prev
;
187 timers_state
.cpu_ticks_prev
= ticks
;
191 static int64_t cpu_get_clock_locked(void)
195 ticks
= timers_state
.cpu_clock_offset
;
196 if (timers_state
.cpu_ticks_enabled
) {
197 ticks
+= get_clock();
203 /* return the host CPU monotonic timer and handle stop/restart */
204 int64_t cpu_get_clock(void)
210 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
211 ti
= cpu_get_clock_locked();
212 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
217 /* enable cpu_get_ticks()
218 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
220 void cpu_enable_ticks(void)
222 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
223 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
224 if (!timers_state
.cpu_ticks_enabled
) {
225 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
226 timers_state
.cpu_clock_offset
-= get_clock();
227 timers_state
.cpu_ticks_enabled
= 1;
229 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
232 /* disable cpu_get_ticks() : the clock is stopped. You must not call
233 * cpu_get_ticks() after that.
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 void cpu_disable_ticks(void)
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
240 if (timers_state
.cpu_ticks_enabled
) {
241 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
242 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
243 timers_state
.cpu_ticks_enabled
= 0;
245 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
248 /* Correlation between real and virtual time is always going to be
249 fairly approximate, so ignore small variation.
250 When the guest is idle real and virtual time will be aligned in
252 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
254 static void icount_adjust(void)
260 /* Protected by TimersState mutex. */
261 static int64_t last_delta
;
263 /* If the VM is not running, then do nothing. */
264 if (!runstate_is_running()) {
268 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
269 cur_time
= cpu_get_clock_locked();
270 cur_icount
= cpu_get_icount_locked();
272 delta
= cur_icount
- cur_time
;
273 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
275 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
276 && icount_time_shift
> 0) {
277 /* The guest is getting too far ahead. Slow time down. */
281 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
282 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
283 /* The guest is getting too far behind. Speed time up. */
287 timers_state
.qemu_icount_bias
= cur_icount
288 - (timers_state
.qemu_icount
<< icount_time_shift
);
289 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
292 static void icount_adjust_rt(void *opaque
)
294 timer_mod(icount_rt_timer
,
295 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
299 static void icount_adjust_vm(void *opaque
)
301 timer_mod(icount_vm_timer
,
302 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
303 get_ticks_per_sec() / 10);
307 static int64_t qemu_icount_round(int64_t count
)
309 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
312 static void icount_warp_rt(void *opaque
)
314 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
315 * changes from -1 to another value, so the race here is okay.
317 if (atomic_read(&vm_clock_warp_start
) == -1) {
321 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
322 if (runstate_is_running()) {
323 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
326 warp_delta
= clock
- vm_clock_warp_start
;
327 if (use_icount
== 2) {
329 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
330 * far ahead of real time.
332 int64_t cur_time
= cpu_get_clock_locked();
333 int64_t cur_icount
= cpu_get_icount_locked();
334 int64_t delta
= cur_time
- cur_icount
;
335 warp_delta
= MIN(warp_delta
, delta
);
337 timers_state
.qemu_icount_bias
+= warp_delta
;
339 vm_clock_warp_start
= -1;
340 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
342 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
343 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
347 void qtest_clock_warp(int64_t dest
)
349 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
350 assert(qtest_enabled());
351 while (clock
< dest
) {
352 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
353 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
354 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
355 timers_state
.qemu_icount_bias
+= warp
;
356 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
358 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
359 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
361 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
364 void qemu_clock_warp(QEMUClockType type
)
370 * There are too many global variables to make the "warp" behavior
371 * applicable to other clocks. But a clock argument removes the
372 * need for if statements all over the place.
374 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
379 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
380 * This ensures that the deadline for the timer is computed correctly below.
381 * This also makes sure that the insn counter is synchronized before the
382 * CPU starts running, in case the CPU is woken by an event other than
383 * the earliest QEMU_CLOCK_VIRTUAL timer.
385 icount_warp_rt(NULL
);
386 timer_del(icount_warp_timer
);
387 if (!all_cpu_threads_idle()) {
391 if (qtest_enabled()) {
392 /* When testing, qtest commands advance icount. */
396 /* We want to use the earliest deadline from ALL vm_clocks */
397 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
398 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
405 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
406 * sleep. Otherwise, the CPU might be waiting for a future timer
407 * interrupt to wake it up, but the interrupt never comes because
408 * the vCPU isn't running any insns and thus doesn't advance the
409 * QEMU_CLOCK_VIRTUAL.
411 * An extreme solution for this problem would be to never let VCPUs
412 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
413 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
414 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
415 * after some e"real" time, (related to the time left until the next
416 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
417 * This avoids that the warps are visible externally; for example,
418 * you will not be sending network packets continuously instead of
421 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
422 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
423 vm_clock_warp_start
= clock
;
425 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
426 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
427 } else if (deadline
== 0) {
428 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
432 static const VMStateDescription vmstate_timers
= {
435 .minimum_version_id
= 1,
436 .fields
= (VMStateField
[]) {
437 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
438 VMSTATE_INT64(dummy
, TimersState
),
439 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
440 VMSTATE_END_OF_LIST()
444 void configure_icount(const char *option
)
446 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
447 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
452 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
453 icount_warp_rt
, NULL
);
454 if (strcmp(option
, "auto") != 0) {
455 icount_time_shift
= strtol(option
, NULL
, 0);
462 /* 125MIPS seems a reasonable initial guess at the guest speed.
463 It will be corrected fairly quickly anyway. */
464 icount_time_shift
= 3;
466 /* Have both realtime and virtual time triggers for speed adjustment.
467 The realtime trigger catches emulated time passing too slowly,
468 the virtual time trigger catches emulated time passing too fast.
469 Realtime triggers occur even when idle, so use them less frequently
471 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
472 icount_adjust_rt
, NULL
);
473 timer_mod(icount_rt_timer
,
474 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
475 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
476 icount_adjust_vm
, NULL
);
477 timer_mod(icount_vm_timer
,
478 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
479 get_ticks_per_sec() / 10);
482 /***********************************************************/
483 void hw_error(const char *fmt
, ...)
489 fprintf(stderr
, "qemu: hardware error: ");
490 vfprintf(stderr
, fmt
, ap
);
491 fprintf(stderr
, "\n");
493 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
494 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
500 void cpu_synchronize_all_states(void)
505 cpu_synchronize_state(cpu
);
509 void cpu_synchronize_all_post_reset(void)
514 cpu_synchronize_post_reset(cpu
);
518 void cpu_synchronize_all_post_init(void)
523 cpu_synchronize_post_init(cpu
);
527 static int do_vm_stop(RunState state
)
531 if (runstate_is_running()) {
535 vm_state_notify(0, state
);
536 qapi_event_send_stop(&error_abort
);
540 ret
= bdrv_flush_all();
545 static bool cpu_can_run(CPUState
*cpu
)
550 if (cpu_is_stopped(cpu
)) {
556 static void cpu_handle_guest_debug(CPUState
*cpu
)
558 gdb_set_stop_cpu(cpu
);
559 qemu_system_debug_request();
563 static void cpu_signal(int sig
)
566 cpu_exit(current_cpu
);
572 static void sigbus_reraise(void)
575 struct sigaction action
;
577 memset(&action
, 0, sizeof(action
));
578 action
.sa_handler
= SIG_DFL
;
579 if (!sigaction(SIGBUS
, &action
, NULL
)) {
582 sigaddset(&set
, SIGBUS
);
583 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
585 perror("Failed to re-raise SIGBUS!\n");
589 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
592 if (kvm_on_sigbus(siginfo
->ssi_code
,
593 (void *)(intptr_t)siginfo
->ssi_addr
)) {
598 static void qemu_init_sigbus(void)
600 struct sigaction action
;
602 memset(&action
, 0, sizeof(action
));
603 action
.sa_flags
= SA_SIGINFO
;
604 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
605 sigaction(SIGBUS
, &action
, NULL
);
607 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
610 static void qemu_kvm_eat_signals(CPUState
*cpu
)
612 struct timespec ts
= { 0, 0 };
618 sigemptyset(&waitset
);
619 sigaddset(&waitset
, SIG_IPI
);
620 sigaddset(&waitset
, SIGBUS
);
623 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
624 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
625 perror("sigtimedwait");
631 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
639 r
= sigpending(&chkset
);
641 perror("sigpending");
644 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
647 #else /* !CONFIG_LINUX */
649 static void qemu_init_sigbus(void)
653 static void qemu_kvm_eat_signals(CPUState
*cpu
)
656 #endif /* !CONFIG_LINUX */
659 static void dummy_signal(int sig
)
663 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
667 struct sigaction sigact
;
669 memset(&sigact
, 0, sizeof(sigact
));
670 sigact
.sa_handler
= dummy_signal
;
671 sigaction(SIG_IPI
, &sigact
, NULL
);
673 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
674 sigdelset(&set
, SIG_IPI
);
675 sigdelset(&set
, SIGBUS
);
676 r
= kvm_set_signal_mask(cpu
, &set
);
678 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
683 static void qemu_tcg_init_cpu_signals(void)
686 struct sigaction sigact
;
688 memset(&sigact
, 0, sizeof(sigact
));
689 sigact
.sa_handler
= cpu_signal
;
690 sigaction(SIG_IPI
, &sigact
, NULL
);
693 sigaddset(&set
, SIG_IPI
);
694 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
698 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
703 static void qemu_tcg_init_cpu_signals(void)
708 static QemuMutex qemu_global_mutex
;
709 static QemuCond qemu_io_proceeded_cond
;
710 static bool iothread_requesting_mutex
;
712 static QemuThread io_thread
;
714 static QemuThread
*tcg_cpu_thread
;
715 static QemuCond
*tcg_halt_cond
;
718 static QemuCond qemu_cpu_cond
;
720 static QemuCond qemu_pause_cond
;
721 static QemuCond qemu_work_cond
;
723 void qemu_init_cpu_loop(void)
726 qemu_cond_init(&qemu_cpu_cond
);
727 qemu_cond_init(&qemu_pause_cond
);
728 qemu_cond_init(&qemu_work_cond
);
729 qemu_cond_init(&qemu_io_proceeded_cond
);
730 qemu_mutex_init(&qemu_global_mutex
);
732 qemu_thread_get_self(&io_thread
);
735 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
737 struct qemu_work_item wi
;
739 if (qemu_cpu_is_self(cpu
)) {
747 if (cpu
->queued_work_first
== NULL
) {
748 cpu
->queued_work_first
= &wi
;
750 cpu
->queued_work_last
->next
= &wi
;
752 cpu
->queued_work_last
= &wi
;
758 CPUState
*self_cpu
= current_cpu
;
760 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
761 current_cpu
= self_cpu
;
765 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
767 struct qemu_work_item
*wi
;
769 if (qemu_cpu_is_self(cpu
)) {
774 wi
= g_malloc0(sizeof(struct qemu_work_item
));
778 if (cpu
->queued_work_first
== NULL
) {
779 cpu
->queued_work_first
= wi
;
781 cpu
->queued_work_last
->next
= wi
;
783 cpu
->queued_work_last
= wi
;
790 static void flush_queued_work(CPUState
*cpu
)
792 struct qemu_work_item
*wi
;
794 if (cpu
->queued_work_first
== NULL
) {
798 while ((wi
= cpu
->queued_work_first
)) {
799 cpu
->queued_work_first
= wi
->next
;
806 cpu
->queued_work_last
= NULL
;
807 qemu_cond_broadcast(&qemu_work_cond
);
810 static void qemu_wait_io_event_common(CPUState
*cpu
)
815 qemu_cond_signal(&qemu_pause_cond
);
817 flush_queued_work(cpu
);
818 cpu
->thread_kicked
= false;
821 static void qemu_tcg_wait_io_event(void)
825 while (all_cpu_threads_idle()) {
826 /* Start accounting real time to the virtual clock if the CPUs
828 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
829 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
832 while (iothread_requesting_mutex
) {
833 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
837 qemu_wait_io_event_common(cpu
);
841 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
843 while (cpu_thread_is_idle(cpu
)) {
844 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
847 qemu_kvm_eat_signals(cpu
);
848 qemu_wait_io_event_common(cpu
);
851 static void *qemu_kvm_cpu_thread_fn(void *arg
)
856 qemu_mutex_lock(&qemu_global_mutex
);
857 qemu_thread_get_self(cpu
->thread
);
858 cpu
->thread_id
= qemu_get_thread_id();
861 r
= kvm_init_vcpu(cpu
);
863 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
867 qemu_kvm_init_cpu_signals(cpu
);
869 /* signal CPU creation */
871 qemu_cond_signal(&qemu_cpu_cond
);
874 if (cpu_can_run(cpu
)) {
875 r
= kvm_cpu_exec(cpu
);
876 if (r
== EXCP_DEBUG
) {
877 cpu_handle_guest_debug(cpu
);
880 qemu_kvm_wait_io_event(cpu
);
886 static void *qemu_dummy_cpu_thread_fn(void *arg
)
889 fprintf(stderr
, "qtest is not supported under Windows\n");
896 qemu_mutex_lock_iothread();
897 qemu_thread_get_self(cpu
->thread
);
898 cpu
->thread_id
= qemu_get_thread_id();
900 sigemptyset(&waitset
);
901 sigaddset(&waitset
, SIG_IPI
);
903 /* signal CPU creation */
905 qemu_cond_signal(&qemu_cpu_cond
);
910 qemu_mutex_unlock_iothread();
913 r
= sigwait(&waitset
, &sig
);
914 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
919 qemu_mutex_lock_iothread();
921 qemu_wait_io_event_common(cpu
);
928 static void tcg_exec_all(void);
930 static void *qemu_tcg_cpu_thread_fn(void *arg
)
934 qemu_tcg_init_cpu_signals();
935 qemu_thread_get_self(cpu
->thread
);
937 qemu_mutex_lock(&qemu_global_mutex
);
939 cpu
->thread_id
= qemu_get_thread_id();
942 qemu_cond_signal(&qemu_cpu_cond
);
944 /* wait for initial kick-off after machine start */
945 while (QTAILQ_FIRST(&cpus
)->stopped
) {
946 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
948 /* process any pending work */
950 qemu_wait_io_event_common(cpu
);
958 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
961 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
964 qemu_tcg_wait_io_event();
970 static void qemu_cpu_kick_thread(CPUState
*cpu
)
975 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
977 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
981 if (!qemu_cpu_is_self(cpu
)) {
984 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
985 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
990 /* On multi-core systems, we are not sure that the thread is actually
991 * suspended until we can get the context.
993 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
994 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1000 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1001 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1009 void qemu_cpu_kick(CPUState
*cpu
)
1011 qemu_cond_broadcast(cpu
->halt_cond
);
1012 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1013 qemu_cpu_kick_thread(cpu
);
1014 cpu
->thread_kicked
= true;
1018 void qemu_cpu_kick_self(void)
1021 assert(current_cpu
);
1023 if (!current_cpu
->thread_kicked
) {
1024 qemu_cpu_kick_thread(current_cpu
);
1025 current_cpu
->thread_kicked
= true;
1032 bool qemu_cpu_is_self(CPUState
*cpu
)
1034 return qemu_thread_is_self(cpu
->thread
);
1037 static bool qemu_in_vcpu_thread(void)
1039 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1042 void qemu_mutex_lock_iothread(void)
1044 if (!tcg_enabled()) {
1045 qemu_mutex_lock(&qemu_global_mutex
);
1047 iothread_requesting_mutex
= true;
1048 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1049 qemu_cpu_kick_thread(first_cpu
);
1050 qemu_mutex_lock(&qemu_global_mutex
);
1052 iothread_requesting_mutex
= false;
1053 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1057 void qemu_mutex_unlock_iothread(void)
1059 qemu_mutex_unlock(&qemu_global_mutex
);
1062 static int all_vcpus_paused(void)
1067 if (!cpu
->stopped
) {
1075 void pause_all_vcpus(void)
1079 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1085 if (qemu_in_vcpu_thread()) {
1087 if (!kvm_enabled()) {
1090 cpu
->stopped
= true;
1096 while (!all_vcpus_paused()) {
1097 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1104 void cpu_resume(CPUState
*cpu
)
1107 cpu
->stopped
= false;
1111 void resume_all_vcpus(void)
1115 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1121 /* For temporary buffers for forming a name */
1122 #define VCPU_THREAD_NAME_SIZE 16
1124 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1126 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1128 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1130 /* share a single thread for all cpus with TCG */
1131 if (!tcg_cpu_thread
) {
1132 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1133 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1134 qemu_cond_init(cpu
->halt_cond
);
1135 tcg_halt_cond
= cpu
->halt_cond
;
1136 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1138 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1139 cpu
, QEMU_THREAD_JOINABLE
);
1141 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1143 while (!cpu
->created
) {
1144 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1146 tcg_cpu_thread
= cpu
->thread
;
1148 cpu
->thread
= tcg_cpu_thread
;
1149 cpu
->halt_cond
= tcg_halt_cond
;
1153 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1155 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1157 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1158 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1159 qemu_cond_init(cpu
->halt_cond
);
1160 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1162 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1163 cpu
, QEMU_THREAD_JOINABLE
);
1164 while (!cpu
->created
) {
1165 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1169 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1171 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1173 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1174 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1175 qemu_cond_init(cpu
->halt_cond
);
1176 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1178 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1179 QEMU_THREAD_JOINABLE
);
1180 while (!cpu
->created
) {
1181 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1185 void qemu_init_vcpu(CPUState
*cpu
)
1187 cpu
->nr_cores
= smp_cores
;
1188 cpu
->nr_threads
= smp_threads
;
1189 cpu
->stopped
= true;
1190 if (kvm_enabled()) {
1191 qemu_kvm_start_vcpu(cpu
);
1192 } else if (tcg_enabled()) {
1193 qemu_tcg_init_vcpu(cpu
);
1195 qemu_dummy_start_vcpu(cpu
);
1199 void cpu_stop_current(void)
1202 current_cpu
->stop
= false;
1203 current_cpu
->stopped
= true;
1204 cpu_exit(current_cpu
);
1205 qemu_cond_signal(&qemu_pause_cond
);
1209 int vm_stop(RunState state
)
1211 if (qemu_in_vcpu_thread()) {
1212 qemu_system_vmstop_request_prepare();
1213 qemu_system_vmstop_request(state
);
1215 * FIXME: should not return to device code in case
1216 * vm_stop() has been requested.
1222 return do_vm_stop(state
);
1225 /* does a state transition even if the VM is already stopped,
1226 current state is forgotten forever */
1227 int vm_stop_force_state(RunState state
)
1229 if (runstate_is_running()) {
1230 return vm_stop(state
);
1232 runstate_set(state
);
1233 /* Make sure to return an error if the flush in a previous vm_stop()
1235 return bdrv_flush_all();
1239 static int tcg_cpu_exec(CPUArchState
*env
)
1241 CPUState
*cpu
= ENV_GET_CPU(env
);
1243 #ifdef CONFIG_PROFILER
1247 #ifdef CONFIG_PROFILER
1248 ti
= profile_getclock();
1254 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1255 + cpu
->icount_extra
);
1256 cpu
->icount_decr
.u16
.low
= 0;
1257 cpu
->icount_extra
= 0;
1258 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1260 /* Maintain prior (possibly buggy) behaviour where if no deadline
1261 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1262 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1265 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1266 deadline
= INT32_MAX
;
1269 count
= qemu_icount_round(deadline
);
1270 timers_state
.qemu_icount
+= count
;
1271 decr
= (count
> 0xffff) ? 0xffff : count
;
1273 cpu
->icount_decr
.u16
.low
= decr
;
1274 cpu
->icount_extra
= count
;
1276 ret
= cpu_exec(env
);
1277 #ifdef CONFIG_PROFILER
1278 qemu_time
+= profile_getclock() - ti
;
1281 /* Fold pending instructions back into the
1282 instruction counter, and clear the interrupt flag. */
1283 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1284 + cpu
->icount_extra
);
1285 cpu
->icount_decr
.u32
= 0;
1286 cpu
->icount_extra
= 0;
1291 static void tcg_exec_all(void)
1295 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1296 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1298 if (next_cpu
== NULL
) {
1299 next_cpu
= first_cpu
;
1301 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1302 CPUState
*cpu
= next_cpu
;
1303 CPUArchState
*env
= cpu
->env_ptr
;
1305 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1306 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1308 if (cpu_can_run(cpu
)) {
1309 r
= tcg_cpu_exec(env
);
1310 if (r
== EXCP_DEBUG
) {
1311 cpu_handle_guest_debug(cpu
);
1314 } else if (cpu
->stop
|| cpu
->stopped
) {
1321 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1323 /* XXX: implement xxx_cpu_list for targets that still miss it */
1324 #if defined(cpu_list)
1325 cpu_list(f
, cpu_fprintf
);
1329 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1331 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1336 #if defined(TARGET_I386)
1337 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1338 CPUX86State
*env
= &x86_cpu
->env
;
1339 #elif defined(TARGET_PPC)
1340 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1341 CPUPPCState
*env
= &ppc_cpu
->env
;
1342 #elif defined(TARGET_SPARC)
1343 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1344 CPUSPARCState
*env
= &sparc_cpu
->env
;
1345 #elif defined(TARGET_MIPS)
1346 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1347 CPUMIPSState
*env
= &mips_cpu
->env
;
1350 cpu_synchronize_state(cpu
);
1352 info
= g_malloc0(sizeof(*info
));
1353 info
->value
= g_malloc0(sizeof(*info
->value
));
1354 info
->value
->CPU
= cpu
->cpu_index
;
1355 info
->value
->current
= (cpu
== first_cpu
);
1356 info
->value
->halted
= cpu
->halted
;
1357 info
->value
->thread_id
= cpu
->thread_id
;
1358 #if defined(TARGET_I386)
1359 info
->value
->has_pc
= true;
1360 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1361 #elif defined(TARGET_PPC)
1362 info
->value
->has_nip
= true;
1363 info
->value
->nip
= env
->nip
;
1364 #elif defined(TARGET_SPARC)
1365 info
->value
->has_pc
= true;
1366 info
->value
->pc
= env
->pc
;
1367 info
->value
->has_npc
= true;
1368 info
->value
->npc
= env
->npc
;
1369 #elif defined(TARGET_MIPS)
1370 info
->value
->has_PC
= true;
1371 info
->value
->PC
= env
->active_tc
.PC
;
1374 /* XXX: waiting for the qapi to support GSList */
1376 head
= cur_item
= info
;
1378 cur_item
->next
= info
;
1386 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1387 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1398 cpu
= qemu_get_cpu(cpu_index
);
1400 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1405 f
= fopen(filename
, "wb");
1407 error_setg_file_open(errp
, errno
, filename
);
1415 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1416 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1419 if (fwrite(buf
, 1, l
, f
) != l
) {
1420 error_set(errp
, QERR_IO_ERROR
);
1431 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1438 f
= fopen(filename
, "wb");
1440 error_setg_file_open(errp
, errno
, filename
);
1448 cpu_physical_memory_read(addr
, buf
, l
);
1449 if (fwrite(buf
, 1, l
, f
) != l
) {
1450 error_set(errp
, QERR_IO_ERROR
);
1461 void qmp_inject_nmi(Error
**errp
)
1463 #if defined(TARGET_I386)
1467 X86CPU
*cpu
= X86_CPU(cs
);
1469 if (!cpu
->apic_state
) {
1470 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1472 apic_deliver_nmi(cpu
->apic_state
);
1475 #elif defined(TARGET_S390X)
1481 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1482 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1483 error_set(errp
, QERR_UNSUPPORTED
);
1490 error_set(errp
, QERR_UNSUPPORTED
);