4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
45 #include "qemu/compatfd.h"
50 #include <sys/prctl.h>
53 #define PR_MCE_KILL 33
56 #ifndef PR_MCE_KILL_SET
57 #define PR_MCE_KILL_SET 1
60 #ifndef PR_MCE_KILL_EARLY
61 #define PR_MCE_KILL_EARLY 1
64 #endif /* CONFIG_LINUX */
66 static CPUState
*next_cpu
;
68 bool cpu_is_stopped(CPUState
*cpu
)
70 return cpu
->stopped
|| !runstate_is_running();
73 static bool cpu_thread_is_idle(CPUState
*cpu
)
75 if (cpu
->stop
|| cpu
->queued_work_first
) {
78 if (cpu_is_stopped(cpu
)) {
81 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
82 kvm_halt_in_kernel()) {
88 static bool all_cpu_threads_idle(void)
93 if (!cpu_thread_is_idle(cpu
)) {
100 /***********************************************************/
101 /* guest cycle counter */
103 /* Protected by TimersState seqlock */
105 static int64_t vm_clock_warp_start
= -1;
106 /* Conversion factor from emulated instructions to virtual clock ticks. */
107 static int icount_time_shift
;
108 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
109 #define MAX_ICOUNT_SHIFT 10
111 static QEMUTimer
*icount_rt_timer
;
112 static QEMUTimer
*icount_vm_timer
;
113 static QEMUTimer
*icount_warp_timer
;
115 typedef struct TimersState
{
116 /* Protected by BQL. */
117 int64_t cpu_ticks_prev
;
118 int64_t cpu_ticks_offset
;
120 /* cpu_clock_offset can be read out of BQL, so protect it with
123 QemuSeqLock vm_clock_seqlock
;
124 int64_t cpu_clock_offset
;
125 int32_t cpu_ticks_enabled
;
128 /* Compensate for varying guest execution speed. */
129 int64_t qemu_icount_bias
;
130 /* Only written by TCG thread */
134 static TimersState timers_state
;
136 /* Return the virtual CPU time, based on the instruction counter. */
137 static int64_t cpu_get_icount_locked(void)
140 CPUState
*cpu
= current_cpu
;
142 icount
= timers_state
.qemu_icount
;
144 if (!cpu_can_do_io(cpu
)) {
145 fprintf(stderr
, "Bad clock read\n");
147 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
149 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
152 int64_t cpu_get_icount(void)
158 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
159 icount
= cpu_get_icount_locked();
160 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
165 int64_t cpu_icount_to_ns(int64_t icount
)
167 return icount
<< icount_time_shift
;
170 /* return the host CPU cycle counter and handle stop/restart */
171 /* Caller must hold the BQL */
172 int64_t cpu_get_ticks(void)
177 return cpu_get_icount();
180 ticks
= timers_state
.cpu_ticks_offset
;
181 if (timers_state
.cpu_ticks_enabled
) {
182 ticks
+= cpu_get_real_ticks();
185 if (timers_state
.cpu_ticks_prev
> ticks
) {
186 /* Note: non increasing ticks may happen if the host uses
188 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
189 ticks
= timers_state
.cpu_ticks_prev
;
192 timers_state
.cpu_ticks_prev
= ticks
;
196 static int64_t cpu_get_clock_locked(void)
200 ticks
= timers_state
.cpu_clock_offset
;
201 if (timers_state
.cpu_ticks_enabled
) {
202 ticks
+= get_clock();
208 /* return the host CPU monotonic timer and handle stop/restart */
209 int64_t cpu_get_clock(void)
215 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
216 ti
= cpu_get_clock_locked();
217 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
222 /* return the offset between the host clock and virtual CPU clock */
223 int64_t cpu_get_clock_offset(void)
229 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
230 ti
= timers_state
.cpu_clock_offset
;
231 if (!timers_state
.cpu_ticks_enabled
) {
234 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
239 /* enable cpu_get_ticks()
240 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
242 void cpu_enable_ticks(void)
244 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
245 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
246 if (!timers_state
.cpu_ticks_enabled
) {
247 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
248 timers_state
.cpu_clock_offset
-= get_clock();
249 timers_state
.cpu_ticks_enabled
= 1;
251 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
254 /* disable cpu_get_ticks() : the clock is stopped. You must not call
255 * cpu_get_ticks() after that.
256 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
258 void cpu_disable_ticks(void)
260 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
261 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
262 if (timers_state
.cpu_ticks_enabled
) {
263 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
264 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
265 timers_state
.cpu_ticks_enabled
= 0;
267 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
270 /* Correlation between real and virtual time is always going to be
271 fairly approximate, so ignore small variation.
272 When the guest is idle real and virtual time will be aligned in
274 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
276 static void icount_adjust(void)
282 /* Protected by TimersState mutex. */
283 static int64_t last_delta
;
285 /* If the VM is not running, then do nothing. */
286 if (!runstate_is_running()) {
290 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
291 cur_time
= cpu_get_clock_locked();
292 cur_icount
= cpu_get_icount_locked();
294 delta
= cur_icount
- cur_time
;
295 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
297 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
298 && icount_time_shift
> 0) {
299 /* The guest is getting too far ahead. Slow time down. */
303 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
304 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
305 /* The guest is getting too far behind. Speed time up. */
309 timers_state
.qemu_icount_bias
= cur_icount
310 - (timers_state
.qemu_icount
<< icount_time_shift
);
311 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
314 static void icount_adjust_rt(void *opaque
)
316 timer_mod(icount_rt_timer
,
317 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
321 static void icount_adjust_vm(void *opaque
)
323 timer_mod(icount_vm_timer
,
324 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
325 get_ticks_per_sec() / 10);
329 static int64_t qemu_icount_round(int64_t count
)
331 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
334 static void icount_warp_rt(void *opaque
)
336 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
337 * changes from -1 to another value, so the race here is okay.
339 if (atomic_read(&vm_clock_warp_start
) == -1) {
343 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
344 if (runstate_is_running()) {
345 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
348 warp_delta
= clock
- vm_clock_warp_start
;
349 if (use_icount
== 2) {
351 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
352 * far ahead of real time.
354 int64_t cur_time
= cpu_get_clock_locked();
355 int64_t cur_icount
= cpu_get_icount_locked();
356 int64_t delta
= cur_time
- cur_icount
;
357 warp_delta
= MIN(warp_delta
, delta
);
359 timers_state
.qemu_icount_bias
+= warp_delta
;
361 vm_clock_warp_start
= -1;
362 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
364 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
365 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
369 void qtest_clock_warp(int64_t dest
)
371 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
372 assert(qtest_enabled());
373 while (clock
< dest
) {
374 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
375 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
376 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
377 timers_state
.qemu_icount_bias
+= warp
;
378 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
380 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
381 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
383 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
386 void qemu_clock_warp(QEMUClockType type
)
392 * There are too many global variables to make the "warp" behavior
393 * applicable to other clocks. But a clock argument removes the
394 * need for if statements all over the place.
396 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
401 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
402 * This ensures that the deadline for the timer is computed correctly below.
403 * This also makes sure that the insn counter is synchronized before the
404 * CPU starts running, in case the CPU is woken by an event other than
405 * the earliest QEMU_CLOCK_VIRTUAL timer.
407 icount_warp_rt(NULL
);
408 timer_del(icount_warp_timer
);
409 if (!all_cpu_threads_idle()) {
413 if (qtest_enabled()) {
414 /* When testing, qtest commands advance icount. */
418 /* We want to use the earliest deadline from ALL vm_clocks */
419 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
420 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
427 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
428 * sleep. Otherwise, the CPU might be waiting for a future timer
429 * interrupt to wake it up, but the interrupt never comes because
430 * the vCPU isn't running any insns and thus doesn't advance the
431 * QEMU_CLOCK_VIRTUAL.
433 * An extreme solution for this problem would be to never let VCPUs
434 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
435 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
436 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
437 * after some e"real" time, (related to the time left until the next
438 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
439 * This avoids that the warps are visible externally; for example,
440 * you will not be sending network packets continuously instead of
443 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
444 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
445 vm_clock_warp_start
= clock
;
447 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
448 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
449 } else if (deadline
== 0) {
450 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
454 static bool icount_state_needed(void *opaque
)
460 * This is a subsection for icount migration.
462 static const VMStateDescription icount_vmstate_timers
= {
463 .name
= "timer/icount",
465 .minimum_version_id
= 1,
466 .fields
= (VMStateField
[]) {
467 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
468 VMSTATE_INT64(qemu_icount
, TimersState
),
469 VMSTATE_END_OF_LIST()
473 static const VMStateDescription vmstate_timers
= {
476 .minimum_version_id
= 1,
477 .fields
= (VMStateField
[]) {
478 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
479 VMSTATE_INT64(dummy
, TimersState
),
480 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
481 VMSTATE_END_OF_LIST()
483 .subsections
= (VMStateSubsection
[]) {
485 .vmsd
= &icount_vmstate_timers
,
486 .needed
= icount_state_needed
,
493 void configure_icount(QemuOpts
*opts
, Error
**errp
)
496 char *rem_str
= NULL
;
498 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
499 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
500 option
= qemu_opt_get(opts
, "shift");
502 if (qemu_opt_get(opts
, "align") != NULL
) {
503 error_setg(errp
, "Please specify shift option when using align");
507 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
508 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
509 icount_warp_rt
, NULL
);
510 if (strcmp(option
, "auto") != 0) {
512 icount_time_shift
= strtol(option
, &rem_str
, 0);
513 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
514 error_setg(errp
, "icount: Invalid shift value");
518 } else if (icount_align_option
) {
519 error_setg(errp
, "shift=auto and align=on are incompatible");
524 /* 125MIPS seems a reasonable initial guess at the guest speed.
525 It will be corrected fairly quickly anyway. */
526 icount_time_shift
= 3;
528 /* Have both realtime and virtual time triggers for speed adjustment.
529 The realtime trigger catches emulated time passing too slowly,
530 the virtual time trigger catches emulated time passing too fast.
531 Realtime triggers occur even when idle, so use them less frequently
533 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
534 icount_adjust_rt
, NULL
);
535 timer_mod(icount_rt_timer
,
536 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
537 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
538 icount_adjust_vm
, NULL
);
539 timer_mod(icount_vm_timer
,
540 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
541 get_ticks_per_sec() / 10);
544 /***********************************************************/
545 void hw_error(const char *fmt
, ...)
551 fprintf(stderr
, "qemu: hardware error: ");
552 vfprintf(stderr
, fmt
, ap
);
553 fprintf(stderr
, "\n");
555 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
556 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
562 void cpu_synchronize_all_states(void)
567 cpu_synchronize_state(cpu
);
571 void cpu_synchronize_all_post_reset(void)
576 cpu_synchronize_post_reset(cpu
);
580 void cpu_synchronize_all_post_init(void)
585 cpu_synchronize_post_init(cpu
);
589 static int do_vm_stop(RunState state
)
593 if (runstate_is_running()) {
597 vm_state_notify(0, state
);
598 qapi_event_send_stop(&error_abort
);
602 ret
= bdrv_flush_all();
607 static bool cpu_can_run(CPUState
*cpu
)
612 if (cpu_is_stopped(cpu
)) {
618 static void cpu_handle_guest_debug(CPUState
*cpu
)
620 gdb_set_stop_cpu(cpu
);
621 qemu_system_debug_request();
625 static void cpu_signal(int sig
)
628 cpu_exit(current_cpu
);
634 static void sigbus_reraise(void)
637 struct sigaction action
;
639 memset(&action
, 0, sizeof(action
));
640 action
.sa_handler
= SIG_DFL
;
641 if (!sigaction(SIGBUS
, &action
, NULL
)) {
644 sigaddset(&set
, SIGBUS
);
645 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
647 perror("Failed to re-raise SIGBUS!\n");
651 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
654 if (kvm_on_sigbus(siginfo
->ssi_code
,
655 (void *)(intptr_t)siginfo
->ssi_addr
)) {
660 static void qemu_init_sigbus(void)
662 struct sigaction action
;
664 memset(&action
, 0, sizeof(action
));
665 action
.sa_flags
= SA_SIGINFO
;
666 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
667 sigaction(SIGBUS
, &action
, NULL
);
669 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
672 static void qemu_kvm_eat_signals(CPUState
*cpu
)
674 struct timespec ts
= { 0, 0 };
680 sigemptyset(&waitset
);
681 sigaddset(&waitset
, SIG_IPI
);
682 sigaddset(&waitset
, SIGBUS
);
685 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
686 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
687 perror("sigtimedwait");
693 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
701 r
= sigpending(&chkset
);
703 perror("sigpending");
706 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
709 #else /* !CONFIG_LINUX */
711 static void qemu_init_sigbus(void)
715 static void qemu_kvm_eat_signals(CPUState
*cpu
)
718 #endif /* !CONFIG_LINUX */
721 static void dummy_signal(int sig
)
725 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
729 struct sigaction sigact
;
731 memset(&sigact
, 0, sizeof(sigact
));
732 sigact
.sa_handler
= dummy_signal
;
733 sigaction(SIG_IPI
, &sigact
, NULL
);
735 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
736 sigdelset(&set
, SIG_IPI
);
737 sigdelset(&set
, SIGBUS
);
738 r
= kvm_set_signal_mask(cpu
, &set
);
740 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
745 static void qemu_tcg_init_cpu_signals(void)
748 struct sigaction sigact
;
750 memset(&sigact
, 0, sizeof(sigact
));
751 sigact
.sa_handler
= cpu_signal
;
752 sigaction(SIG_IPI
, &sigact
, NULL
);
755 sigaddset(&set
, SIG_IPI
);
756 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
760 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
765 static void qemu_tcg_init_cpu_signals(void)
770 static QemuMutex qemu_global_mutex
;
771 static QemuCond qemu_io_proceeded_cond
;
772 static bool iothread_requesting_mutex
;
774 static QemuThread io_thread
;
776 static QemuThread
*tcg_cpu_thread
;
777 static QemuCond
*tcg_halt_cond
;
780 static QemuCond qemu_cpu_cond
;
782 static QemuCond qemu_pause_cond
;
783 static QemuCond qemu_work_cond
;
785 void qemu_init_cpu_loop(void)
788 qemu_cond_init(&qemu_cpu_cond
);
789 qemu_cond_init(&qemu_pause_cond
);
790 qemu_cond_init(&qemu_work_cond
);
791 qemu_cond_init(&qemu_io_proceeded_cond
);
792 qemu_mutex_init(&qemu_global_mutex
);
794 qemu_thread_get_self(&io_thread
);
797 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
799 struct qemu_work_item wi
;
801 if (qemu_cpu_is_self(cpu
)) {
809 if (cpu
->queued_work_first
== NULL
) {
810 cpu
->queued_work_first
= &wi
;
812 cpu
->queued_work_last
->next
= &wi
;
814 cpu
->queued_work_last
= &wi
;
820 CPUState
*self_cpu
= current_cpu
;
822 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
823 current_cpu
= self_cpu
;
827 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
829 struct qemu_work_item
*wi
;
831 if (qemu_cpu_is_self(cpu
)) {
836 wi
= g_malloc0(sizeof(struct qemu_work_item
));
840 if (cpu
->queued_work_first
== NULL
) {
841 cpu
->queued_work_first
= wi
;
843 cpu
->queued_work_last
->next
= wi
;
845 cpu
->queued_work_last
= wi
;
852 static void flush_queued_work(CPUState
*cpu
)
854 struct qemu_work_item
*wi
;
856 if (cpu
->queued_work_first
== NULL
) {
860 while ((wi
= cpu
->queued_work_first
)) {
861 cpu
->queued_work_first
= wi
->next
;
868 cpu
->queued_work_last
= NULL
;
869 qemu_cond_broadcast(&qemu_work_cond
);
872 static void qemu_wait_io_event_common(CPUState
*cpu
)
877 qemu_cond_signal(&qemu_pause_cond
);
879 flush_queued_work(cpu
);
880 cpu
->thread_kicked
= false;
883 static void qemu_tcg_wait_io_event(void)
887 while (all_cpu_threads_idle()) {
888 /* Start accounting real time to the virtual clock if the CPUs
890 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
891 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
894 while (iothread_requesting_mutex
) {
895 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
899 qemu_wait_io_event_common(cpu
);
903 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
905 while (cpu_thread_is_idle(cpu
)) {
906 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
909 qemu_kvm_eat_signals(cpu
);
910 qemu_wait_io_event_common(cpu
);
913 static void *qemu_kvm_cpu_thread_fn(void *arg
)
918 qemu_mutex_lock(&qemu_global_mutex
);
919 qemu_thread_get_self(cpu
->thread
);
920 cpu
->thread_id
= qemu_get_thread_id();
923 r
= kvm_init_vcpu(cpu
);
925 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
929 qemu_kvm_init_cpu_signals(cpu
);
931 /* signal CPU creation */
933 qemu_cond_signal(&qemu_cpu_cond
);
936 if (cpu_can_run(cpu
)) {
937 r
= kvm_cpu_exec(cpu
);
938 if (r
== EXCP_DEBUG
) {
939 cpu_handle_guest_debug(cpu
);
942 qemu_kvm_wait_io_event(cpu
);
948 static void *qemu_dummy_cpu_thread_fn(void *arg
)
951 fprintf(stderr
, "qtest is not supported under Windows\n");
958 qemu_mutex_lock_iothread();
959 qemu_thread_get_self(cpu
->thread
);
960 cpu
->thread_id
= qemu_get_thread_id();
962 sigemptyset(&waitset
);
963 sigaddset(&waitset
, SIG_IPI
);
965 /* signal CPU creation */
967 qemu_cond_signal(&qemu_cpu_cond
);
972 qemu_mutex_unlock_iothread();
975 r
= sigwait(&waitset
, &sig
);
976 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
981 qemu_mutex_lock_iothread();
983 qemu_wait_io_event_common(cpu
);
990 static void tcg_exec_all(void);
992 static void *qemu_tcg_cpu_thread_fn(void *arg
)
996 qemu_tcg_init_cpu_signals();
997 qemu_thread_get_self(cpu
->thread
);
999 qemu_mutex_lock(&qemu_global_mutex
);
1001 cpu
->thread_id
= qemu_get_thread_id();
1002 cpu
->created
= true;
1004 qemu_cond_signal(&qemu_cpu_cond
);
1006 /* wait for initial kick-off after machine start */
1007 while (QTAILQ_FIRST(&cpus
)->stopped
) {
1008 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1010 /* process any pending work */
1012 qemu_wait_io_event_common(cpu
);
1020 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1022 if (deadline
== 0) {
1023 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1026 qemu_tcg_wait_io_event();
1032 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1037 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1039 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1043 if (!qemu_cpu_is_self(cpu
)) {
1046 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1047 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1052 /* On multi-core systems, we are not sure that the thread is actually
1053 * suspended until we can get the context.
1055 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1056 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1062 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1063 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1071 void qemu_cpu_kick(CPUState
*cpu
)
1073 qemu_cond_broadcast(cpu
->halt_cond
);
1074 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1075 qemu_cpu_kick_thread(cpu
);
1076 cpu
->thread_kicked
= true;
1080 void qemu_cpu_kick_self(void)
1083 assert(current_cpu
);
1085 if (!current_cpu
->thread_kicked
) {
1086 qemu_cpu_kick_thread(current_cpu
);
1087 current_cpu
->thread_kicked
= true;
1094 bool qemu_cpu_is_self(CPUState
*cpu
)
1096 return qemu_thread_is_self(cpu
->thread
);
1099 static bool qemu_in_vcpu_thread(void)
1101 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1104 void qemu_mutex_lock_iothread(void)
1106 if (!tcg_enabled()) {
1107 qemu_mutex_lock(&qemu_global_mutex
);
1109 iothread_requesting_mutex
= true;
1110 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1111 qemu_cpu_kick_thread(first_cpu
);
1112 qemu_mutex_lock(&qemu_global_mutex
);
1114 iothread_requesting_mutex
= false;
1115 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1119 void qemu_mutex_unlock_iothread(void)
1121 qemu_mutex_unlock(&qemu_global_mutex
);
1124 static int all_vcpus_paused(void)
1129 if (!cpu
->stopped
) {
1137 void pause_all_vcpus(void)
1141 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1147 if (qemu_in_vcpu_thread()) {
1149 if (!kvm_enabled()) {
1152 cpu
->stopped
= true;
1158 while (!all_vcpus_paused()) {
1159 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1166 void cpu_resume(CPUState
*cpu
)
1169 cpu
->stopped
= false;
1173 void resume_all_vcpus(void)
1177 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1183 /* For temporary buffers for forming a name */
1184 #define VCPU_THREAD_NAME_SIZE 16
1186 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1188 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1190 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1192 /* share a single thread for all cpus with TCG */
1193 if (!tcg_cpu_thread
) {
1194 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1195 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1196 qemu_cond_init(cpu
->halt_cond
);
1197 tcg_halt_cond
= cpu
->halt_cond
;
1198 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1200 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1201 cpu
, QEMU_THREAD_JOINABLE
);
1203 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1205 while (!cpu
->created
) {
1206 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1208 tcg_cpu_thread
= cpu
->thread
;
1210 cpu
->thread
= tcg_cpu_thread
;
1211 cpu
->halt_cond
= tcg_halt_cond
;
1215 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1217 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1219 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1220 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1221 qemu_cond_init(cpu
->halt_cond
);
1222 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1224 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1225 cpu
, QEMU_THREAD_JOINABLE
);
1226 while (!cpu
->created
) {
1227 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1231 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1233 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1235 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1236 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1237 qemu_cond_init(cpu
->halt_cond
);
1238 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1240 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1241 QEMU_THREAD_JOINABLE
);
1242 while (!cpu
->created
) {
1243 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1247 void qemu_init_vcpu(CPUState
*cpu
)
1249 cpu
->nr_cores
= smp_cores
;
1250 cpu
->nr_threads
= smp_threads
;
1251 cpu
->stopped
= true;
1252 if (kvm_enabled()) {
1253 qemu_kvm_start_vcpu(cpu
);
1254 } else if (tcg_enabled()) {
1255 qemu_tcg_init_vcpu(cpu
);
1257 qemu_dummy_start_vcpu(cpu
);
1261 void cpu_stop_current(void)
1264 current_cpu
->stop
= false;
1265 current_cpu
->stopped
= true;
1266 cpu_exit(current_cpu
);
1267 qemu_cond_signal(&qemu_pause_cond
);
1271 int vm_stop(RunState state
)
1273 if (qemu_in_vcpu_thread()) {
1274 qemu_system_vmstop_request_prepare();
1275 qemu_system_vmstop_request(state
);
1277 * FIXME: should not return to device code in case
1278 * vm_stop() has been requested.
1284 return do_vm_stop(state
);
1287 /* does a state transition even if the VM is already stopped,
1288 current state is forgotten forever */
1289 int vm_stop_force_state(RunState state
)
1291 if (runstate_is_running()) {
1292 return vm_stop(state
);
1294 runstate_set(state
);
1295 /* Make sure to return an error if the flush in a previous vm_stop()
1297 return bdrv_flush_all();
1301 static int tcg_cpu_exec(CPUArchState
*env
)
1303 CPUState
*cpu
= ENV_GET_CPU(env
);
1305 #ifdef CONFIG_PROFILER
1309 #ifdef CONFIG_PROFILER
1310 ti
= profile_getclock();
1316 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1317 + cpu
->icount_extra
);
1318 cpu
->icount_decr
.u16
.low
= 0;
1319 cpu
->icount_extra
= 0;
1320 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1322 /* Maintain prior (possibly buggy) behaviour where if no deadline
1323 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1324 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1327 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1328 deadline
= INT32_MAX
;
1331 count
= qemu_icount_round(deadline
);
1332 timers_state
.qemu_icount
+= count
;
1333 decr
= (count
> 0xffff) ? 0xffff : count
;
1335 cpu
->icount_decr
.u16
.low
= decr
;
1336 cpu
->icount_extra
= count
;
1338 ret
= cpu_exec(env
);
1339 #ifdef CONFIG_PROFILER
1340 qemu_time
+= profile_getclock() - ti
;
1343 /* Fold pending instructions back into the
1344 instruction counter, and clear the interrupt flag. */
1345 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1346 + cpu
->icount_extra
);
1347 cpu
->icount_decr
.u32
= 0;
1348 cpu
->icount_extra
= 0;
1353 static void tcg_exec_all(void)
1357 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1358 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1360 if (next_cpu
== NULL
) {
1361 next_cpu
= first_cpu
;
1363 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1364 CPUState
*cpu
= next_cpu
;
1365 CPUArchState
*env
= cpu
->env_ptr
;
1367 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1368 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1370 if (cpu_can_run(cpu
)) {
1371 r
= tcg_cpu_exec(env
);
1372 if (r
== EXCP_DEBUG
) {
1373 cpu_handle_guest_debug(cpu
);
1376 } else if (cpu
->stop
|| cpu
->stopped
) {
1383 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1385 /* XXX: implement xxx_cpu_list for targets that still miss it */
1386 #if defined(cpu_list)
1387 cpu_list(f
, cpu_fprintf
);
1391 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1393 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1398 #if defined(TARGET_I386)
1399 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1400 CPUX86State
*env
= &x86_cpu
->env
;
1401 #elif defined(TARGET_PPC)
1402 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1403 CPUPPCState
*env
= &ppc_cpu
->env
;
1404 #elif defined(TARGET_SPARC)
1405 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1406 CPUSPARCState
*env
= &sparc_cpu
->env
;
1407 #elif defined(TARGET_MIPS)
1408 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1409 CPUMIPSState
*env
= &mips_cpu
->env
;
1412 cpu_synchronize_state(cpu
);
1414 info
= g_malloc0(sizeof(*info
));
1415 info
->value
= g_malloc0(sizeof(*info
->value
));
1416 info
->value
->CPU
= cpu
->cpu_index
;
1417 info
->value
->current
= (cpu
== first_cpu
);
1418 info
->value
->halted
= cpu
->halted
;
1419 info
->value
->thread_id
= cpu
->thread_id
;
1420 #if defined(TARGET_I386)
1421 info
->value
->has_pc
= true;
1422 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1423 #elif defined(TARGET_PPC)
1424 info
->value
->has_nip
= true;
1425 info
->value
->nip
= env
->nip
;
1426 #elif defined(TARGET_SPARC)
1427 info
->value
->has_pc
= true;
1428 info
->value
->pc
= env
->pc
;
1429 info
->value
->has_npc
= true;
1430 info
->value
->npc
= env
->npc
;
1431 #elif defined(TARGET_MIPS)
1432 info
->value
->has_PC
= true;
1433 info
->value
->PC
= env
->active_tc
.PC
;
1436 /* XXX: waiting for the qapi to support GSList */
1438 head
= cur_item
= info
;
1440 cur_item
->next
= info
;
1448 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1449 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1460 cpu
= qemu_get_cpu(cpu_index
);
1462 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1467 f
= fopen(filename
, "wb");
1469 error_setg_file_open(errp
, errno
, filename
);
1477 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1478 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1481 if (fwrite(buf
, 1, l
, f
) != l
) {
1482 error_set(errp
, QERR_IO_ERROR
);
1493 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1500 f
= fopen(filename
, "wb");
1502 error_setg_file_open(errp
, errno
, filename
);
1510 cpu_physical_memory_read(addr
, buf
, l
);
1511 if (fwrite(buf
, 1, l
, f
) != l
) {
1512 error_set(errp
, QERR_IO_ERROR
);
1523 void qmp_inject_nmi(Error
**errp
)
1525 #if defined(TARGET_I386)
1529 X86CPU
*cpu
= X86_CPU(cs
);
1531 if (!cpu
->apic_state
) {
1532 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1534 apic_deliver_nmi(cpu
->apic_state
);
1537 #elif defined(TARGET_S390X)
1543 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1544 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1545 error_set(errp
, QERR_UNSUPPORTED
);
1552 error_set(errp
, QERR_UNSUPPORTED
);