4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState
*next_cpu
;
71 bool cpu_is_stopped(CPUState
*cpu
)
73 return cpu
->stopped
|| !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState
*cpu
)
78 if (cpu
->stop
|| cpu
->queued_work_first
) {
81 if (cpu_is_stopped(cpu
)) {
84 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu
)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static int64_t vm_clock_warp_start
= -1;
109 /* Conversion factor from emulated instructions to virtual clock ticks. */
110 static int icount_time_shift
;
111 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
112 #define MAX_ICOUNT_SHIFT 10
114 static QEMUTimer
*icount_rt_timer
;
115 static QEMUTimer
*icount_vm_timer
;
116 static QEMUTimer
*icount_warp_timer
;
118 typedef struct TimersState
{
119 /* Protected by BQL. */
120 int64_t cpu_ticks_prev
;
121 int64_t cpu_ticks_offset
;
123 /* cpu_clock_offset can be read out of BQL, so protect it with
126 QemuSeqLock vm_clock_seqlock
;
127 int64_t cpu_clock_offset
;
128 int32_t cpu_ticks_enabled
;
131 /* Compensate for varying guest execution speed. */
132 int64_t qemu_icount_bias
;
133 /* Only written by TCG thread */
137 static TimersState timers_state
;
139 int64_t cpu_get_icount_raw(void)
142 CPUState
*cpu
= current_cpu
;
144 icount
= timers_state
.qemu_icount
;
146 if (!cpu_can_do_io(cpu
)) {
147 fprintf(stderr
, "Bad icount read\n");
150 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
155 /* Return the virtual CPU time, based on the instruction counter. */
156 static int64_t cpu_get_icount_locked(void)
158 int64_t icount
= cpu_get_icount_raw();
159 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
162 int64_t cpu_get_icount(void)
168 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
169 icount
= cpu_get_icount_locked();
170 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
175 int64_t cpu_icount_to_ns(int64_t icount
)
177 return icount
<< icount_time_shift
;
180 /* return the host CPU cycle counter and handle stop/restart */
181 /* Caller must hold the BQL */
182 int64_t cpu_get_ticks(void)
187 return cpu_get_icount();
190 ticks
= timers_state
.cpu_ticks_offset
;
191 if (timers_state
.cpu_ticks_enabled
) {
192 ticks
+= cpu_get_real_ticks();
195 if (timers_state
.cpu_ticks_prev
> ticks
) {
196 /* Note: non increasing ticks may happen if the host uses
198 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
199 ticks
= timers_state
.cpu_ticks_prev
;
202 timers_state
.cpu_ticks_prev
= ticks
;
206 static int64_t cpu_get_clock_locked(void)
210 ticks
= timers_state
.cpu_clock_offset
;
211 if (timers_state
.cpu_ticks_enabled
) {
212 ticks
+= get_clock();
218 /* return the host CPU monotonic timer and handle stop/restart */
219 int64_t cpu_get_clock(void)
225 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
226 ti
= cpu_get_clock_locked();
227 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
232 /* return the offset between the host clock and virtual CPU clock */
233 int64_t cpu_get_clock_offset(void)
239 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
240 ti
= timers_state
.cpu_clock_offset
;
241 if (!timers_state
.cpu_ticks_enabled
) {
244 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
249 /* enable cpu_get_ticks()
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 void cpu_enable_ticks(void)
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
256 if (!timers_state
.cpu_ticks_enabled
) {
257 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
258 timers_state
.cpu_clock_offset
-= get_clock();
259 timers_state
.cpu_ticks_enabled
= 1;
261 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
264 /* disable cpu_get_ticks() : the clock is stopped. You must not call
265 * cpu_get_ticks() after that.
266 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
268 void cpu_disable_ticks(void)
270 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
271 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
272 if (timers_state
.cpu_ticks_enabled
) {
273 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
274 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
275 timers_state
.cpu_ticks_enabled
= 0;
277 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
280 /* Correlation between real and virtual time is always going to be
281 fairly approximate, so ignore small variation.
282 When the guest is idle real and virtual time will be aligned in
284 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
286 static void icount_adjust(void)
292 /* Protected by TimersState mutex. */
293 static int64_t last_delta
;
295 /* If the VM is not running, then do nothing. */
296 if (!runstate_is_running()) {
300 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
301 cur_time
= cpu_get_clock_locked();
302 cur_icount
= cpu_get_icount_locked();
304 delta
= cur_icount
- cur_time
;
305 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
307 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
308 && icount_time_shift
> 0) {
309 /* The guest is getting too far ahead. Slow time down. */
313 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
314 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
315 /* The guest is getting too far behind. Speed time up. */
319 timers_state
.qemu_icount_bias
= cur_icount
320 - (timers_state
.qemu_icount
<< icount_time_shift
);
321 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
324 static void icount_adjust_rt(void *opaque
)
326 timer_mod(icount_rt_timer
,
327 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
331 static void icount_adjust_vm(void *opaque
)
333 timer_mod(icount_vm_timer
,
334 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
335 get_ticks_per_sec() / 10);
339 static int64_t qemu_icount_round(int64_t count
)
341 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
344 static void icount_warp_rt(void *opaque
)
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
349 if (atomic_read(&vm_clock_warp_start
) == -1) {
353 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
354 if (runstate_is_running()) {
355 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
358 warp_delta
= clock
- vm_clock_warp_start
;
359 if (use_icount
== 2) {
361 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
362 * far ahead of real time.
364 int64_t cur_time
= cpu_get_clock_locked();
365 int64_t cur_icount
= cpu_get_icount_locked();
366 int64_t delta
= cur_time
- cur_icount
;
367 warp_delta
= MIN(warp_delta
, delta
);
369 timers_state
.qemu_icount_bias
+= warp_delta
;
371 vm_clock_warp_start
= -1;
372 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
374 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
375 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
379 void qtest_clock_warp(int64_t dest
)
381 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
382 assert(qtest_enabled());
383 while (clock
< dest
) {
384 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
385 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
386 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
387 timers_state
.qemu_icount_bias
+= warp
;
388 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
390 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
391 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
393 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
396 void qemu_clock_warp(QEMUClockType type
)
402 * There are too many global variables to make the "warp" behavior
403 * applicable to other clocks. But a clock argument removes the
404 * need for if statements all over the place.
406 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
411 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
412 * This ensures that the deadline for the timer is computed correctly below.
413 * This also makes sure that the insn counter is synchronized before the
414 * CPU starts running, in case the CPU is woken by an event other than
415 * the earliest QEMU_CLOCK_VIRTUAL timer.
417 icount_warp_rt(NULL
);
418 timer_del(icount_warp_timer
);
419 if (!all_cpu_threads_idle()) {
423 if (qtest_enabled()) {
424 /* When testing, qtest commands advance icount. */
428 /* We want to use the earliest deadline from ALL vm_clocks */
429 clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
430 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
437 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
438 * sleep. Otherwise, the CPU might be waiting for a future timer
439 * interrupt to wake it up, but the interrupt never comes because
440 * the vCPU isn't running any insns and thus doesn't advance the
441 * QEMU_CLOCK_VIRTUAL.
443 * An extreme solution for this problem would be to never let VCPUs
444 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
445 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
446 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
447 * after some e"real" time, (related to the time left until the next
448 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
449 * This avoids that the warps are visible externally; for example,
450 * you will not be sending network packets continuously instead of
453 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
454 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
455 vm_clock_warp_start
= clock
;
457 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
458 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
459 } else if (deadline
== 0) {
460 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
464 static bool icount_state_needed(void *opaque
)
470 * This is a subsection for icount migration.
472 static const VMStateDescription icount_vmstate_timers
= {
473 .name
= "timer/icount",
475 .minimum_version_id
= 1,
476 .fields
= (VMStateField
[]) {
477 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
478 VMSTATE_INT64(qemu_icount
, TimersState
),
479 VMSTATE_END_OF_LIST()
483 static const VMStateDescription vmstate_timers
= {
486 .minimum_version_id
= 1,
487 .fields
= (VMStateField
[]) {
488 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
489 VMSTATE_INT64(dummy
, TimersState
),
490 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
491 VMSTATE_END_OF_LIST()
493 .subsections
= (VMStateSubsection
[]) {
495 .vmsd
= &icount_vmstate_timers
,
496 .needed
= icount_state_needed
,
503 void cpu_ticks_init(void)
505 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
506 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
509 void configure_icount(QemuOpts
*opts
, Error
**errp
)
512 char *rem_str
= NULL
;
514 option
= qemu_opt_get(opts
, "shift");
516 if (qemu_opt_get(opts
, "align") != NULL
) {
517 error_setg(errp
, "Please specify shift option when using align");
521 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
522 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
523 icount_warp_rt
, NULL
);
524 if (strcmp(option
, "auto") != 0) {
526 icount_time_shift
= strtol(option
, &rem_str
, 0);
527 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
528 error_setg(errp
, "icount: Invalid shift value");
532 } else if (icount_align_option
) {
533 error_setg(errp
, "shift=auto and align=on are incompatible");
538 /* 125MIPS seems a reasonable initial guess at the guest speed.
539 It will be corrected fairly quickly anyway. */
540 icount_time_shift
= 3;
542 /* Have both realtime and virtual time triggers for speed adjustment.
543 The realtime trigger catches emulated time passing too slowly,
544 the virtual time trigger catches emulated time passing too fast.
545 Realtime triggers occur even when idle, so use them less frequently
547 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
548 icount_adjust_rt
, NULL
);
549 timer_mod(icount_rt_timer
,
550 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
551 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
552 icount_adjust_vm
, NULL
);
553 timer_mod(icount_vm_timer
,
554 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
555 get_ticks_per_sec() / 10);
558 /***********************************************************/
559 void hw_error(const char *fmt
, ...)
565 fprintf(stderr
, "qemu: hardware error: ");
566 vfprintf(stderr
, fmt
, ap
);
567 fprintf(stderr
, "\n");
569 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
570 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
576 void cpu_synchronize_all_states(void)
581 cpu_synchronize_state(cpu
);
585 void cpu_synchronize_all_post_reset(void)
590 cpu_synchronize_post_reset(cpu
);
594 void cpu_synchronize_all_post_init(void)
599 cpu_synchronize_post_init(cpu
);
603 void cpu_clean_all_dirty(void)
608 cpu_clean_state(cpu
);
612 static int do_vm_stop(RunState state
)
616 if (runstate_is_running()) {
620 vm_state_notify(0, state
);
621 qapi_event_send_stop(&error_abort
);
625 ret
= bdrv_flush_all();
630 static bool cpu_can_run(CPUState
*cpu
)
635 if (cpu_is_stopped(cpu
)) {
641 static void cpu_handle_guest_debug(CPUState
*cpu
)
643 gdb_set_stop_cpu(cpu
);
644 qemu_system_debug_request();
648 static void cpu_signal(int sig
)
651 cpu_exit(current_cpu
);
657 static void sigbus_reraise(void)
660 struct sigaction action
;
662 memset(&action
, 0, sizeof(action
));
663 action
.sa_handler
= SIG_DFL
;
664 if (!sigaction(SIGBUS
, &action
, NULL
)) {
667 sigaddset(&set
, SIGBUS
);
668 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
670 perror("Failed to re-raise SIGBUS!\n");
674 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
677 if (kvm_on_sigbus(siginfo
->ssi_code
,
678 (void *)(intptr_t)siginfo
->ssi_addr
)) {
683 static void qemu_init_sigbus(void)
685 struct sigaction action
;
687 memset(&action
, 0, sizeof(action
));
688 action
.sa_flags
= SA_SIGINFO
;
689 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
690 sigaction(SIGBUS
, &action
, NULL
);
692 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
695 static void qemu_kvm_eat_signals(CPUState
*cpu
)
697 struct timespec ts
= { 0, 0 };
703 sigemptyset(&waitset
);
704 sigaddset(&waitset
, SIG_IPI
);
705 sigaddset(&waitset
, SIGBUS
);
708 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
709 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
710 perror("sigtimedwait");
716 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
724 r
= sigpending(&chkset
);
726 perror("sigpending");
729 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
732 #else /* !CONFIG_LINUX */
734 static void qemu_init_sigbus(void)
738 static void qemu_kvm_eat_signals(CPUState
*cpu
)
741 #endif /* !CONFIG_LINUX */
744 static void dummy_signal(int sig
)
748 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
752 struct sigaction sigact
;
754 memset(&sigact
, 0, sizeof(sigact
));
755 sigact
.sa_handler
= dummy_signal
;
756 sigaction(SIG_IPI
, &sigact
, NULL
);
758 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
759 sigdelset(&set
, SIG_IPI
);
760 sigdelset(&set
, SIGBUS
);
761 r
= kvm_set_signal_mask(cpu
, &set
);
763 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
768 static void qemu_tcg_init_cpu_signals(void)
771 struct sigaction sigact
;
773 memset(&sigact
, 0, sizeof(sigact
));
774 sigact
.sa_handler
= cpu_signal
;
775 sigaction(SIG_IPI
, &sigact
, NULL
);
778 sigaddset(&set
, SIG_IPI
);
779 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
783 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
788 static void qemu_tcg_init_cpu_signals(void)
793 static QemuMutex qemu_global_mutex
;
794 static QemuCond qemu_io_proceeded_cond
;
795 static bool iothread_requesting_mutex
;
797 static QemuThread io_thread
;
799 static QemuThread
*tcg_cpu_thread
;
800 static QemuCond
*tcg_halt_cond
;
803 static QemuCond qemu_cpu_cond
;
805 static QemuCond qemu_pause_cond
;
806 static QemuCond qemu_work_cond
;
808 void qemu_init_cpu_loop(void)
811 qemu_cond_init(&qemu_cpu_cond
);
812 qemu_cond_init(&qemu_pause_cond
);
813 qemu_cond_init(&qemu_work_cond
);
814 qemu_cond_init(&qemu_io_proceeded_cond
);
815 qemu_mutex_init(&qemu_global_mutex
);
817 qemu_thread_get_self(&io_thread
);
820 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
822 struct qemu_work_item wi
;
824 if (qemu_cpu_is_self(cpu
)) {
832 if (cpu
->queued_work_first
== NULL
) {
833 cpu
->queued_work_first
= &wi
;
835 cpu
->queued_work_last
->next
= &wi
;
837 cpu
->queued_work_last
= &wi
;
843 CPUState
*self_cpu
= current_cpu
;
845 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
846 current_cpu
= self_cpu
;
850 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
852 struct qemu_work_item
*wi
;
854 if (qemu_cpu_is_self(cpu
)) {
859 wi
= g_malloc0(sizeof(struct qemu_work_item
));
863 if (cpu
->queued_work_first
== NULL
) {
864 cpu
->queued_work_first
= wi
;
866 cpu
->queued_work_last
->next
= wi
;
868 cpu
->queued_work_last
= wi
;
875 static void flush_queued_work(CPUState
*cpu
)
877 struct qemu_work_item
*wi
;
879 if (cpu
->queued_work_first
== NULL
) {
883 while ((wi
= cpu
->queued_work_first
)) {
884 cpu
->queued_work_first
= wi
->next
;
891 cpu
->queued_work_last
= NULL
;
892 qemu_cond_broadcast(&qemu_work_cond
);
895 static void qemu_wait_io_event_common(CPUState
*cpu
)
900 qemu_cond_signal(&qemu_pause_cond
);
902 flush_queued_work(cpu
);
903 cpu
->thread_kicked
= false;
906 static void qemu_tcg_wait_io_event(void)
910 while (all_cpu_threads_idle()) {
911 /* Start accounting real time to the virtual clock if the CPUs
913 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
914 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
917 while (iothread_requesting_mutex
) {
918 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
922 qemu_wait_io_event_common(cpu
);
926 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
928 while (cpu_thread_is_idle(cpu
)) {
929 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
932 qemu_kvm_eat_signals(cpu
);
933 qemu_wait_io_event_common(cpu
);
936 static void *qemu_kvm_cpu_thread_fn(void *arg
)
941 qemu_mutex_lock(&qemu_global_mutex
);
942 qemu_thread_get_self(cpu
->thread
);
943 cpu
->thread_id
= qemu_get_thread_id();
944 cpu
->exception_index
= -1;
948 r
= kvm_init_vcpu(cpu
);
950 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
954 qemu_kvm_init_cpu_signals(cpu
);
956 /* signal CPU creation */
958 qemu_cond_signal(&qemu_cpu_cond
);
961 if (cpu_can_run(cpu
)) {
962 r
= kvm_cpu_exec(cpu
);
963 if (r
== EXCP_DEBUG
) {
964 cpu_handle_guest_debug(cpu
);
967 qemu_kvm_wait_io_event(cpu
);
973 static void *qemu_dummy_cpu_thread_fn(void *arg
)
976 fprintf(stderr
, "qtest is not supported under Windows\n");
983 qemu_mutex_lock_iothread();
984 qemu_thread_get_self(cpu
->thread
);
985 cpu
->thread_id
= qemu_get_thread_id();
986 cpu
->exception_index
= -1;
989 sigemptyset(&waitset
);
990 sigaddset(&waitset
, SIG_IPI
);
992 /* signal CPU creation */
994 qemu_cond_signal(&qemu_cpu_cond
);
999 qemu_mutex_unlock_iothread();
1002 r
= sigwait(&waitset
, &sig
);
1003 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1008 qemu_mutex_lock_iothread();
1010 qemu_wait_io_event_common(cpu
);
1017 static void tcg_exec_all(void);
1019 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1021 CPUState
*cpu
= arg
;
1023 qemu_tcg_init_cpu_signals();
1024 qemu_thread_get_self(cpu
->thread
);
1026 qemu_mutex_lock(&qemu_global_mutex
);
1028 cpu
->thread_id
= qemu_get_thread_id();
1029 cpu
->created
= true;
1030 cpu
->exception_index
= -1;
1033 qemu_cond_signal(&qemu_cpu_cond
);
1035 /* wait for initial kick-off after machine start */
1036 while (QTAILQ_FIRST(&cpus
)->stopped
) {
1037 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1039 /* process any pending work */
1041 qemu_wait_io_event_common(cpu
);
1049 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1051 if (deadline
== 0) {
1052 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1055 qemu_tcg_wait_io_event();
1061 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1066 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1068 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1072 if (!qemu_cpu_is_self(cpu
)) {
1075 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1076 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1081 /* On multi-core systems, we are not sure that the thread is actually
1082 * suspended until we can get the context.
1084 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1085 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1091 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1092 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1100 void qemu_cpu_kick(CPUState
*cpu
)
1102 qemu_cond_broadcast(cpu
->halt_cond
);
1103 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1104 qemu_cpu_kick_thread(cpu
);
1105 cpu
->thread_kicked
= true;
1109 void qemu_cpu_kick_self(void)
1112 assert(current_cpu
);
1114 if (!current_cpu
->thread_kicked
) {
1115 qemu_cpu_kick_thread(current_cpu
);
1116 current_cpu
->thread_kicked
= true;
1123 bool qemu_cpu_is_self(CPUState
*cpu
)
1125 return qemu_thread_is_self(cpu
->thread
);
1128 static bool qemu_in_vcpu_thread(void)
1130 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1133 void qemu_mutex_lock_iothread(void)
1135 if (!tcg_enabled()) {
1136 qemu_mutex_lock(&qemu_global_mutex
);
1138 iothread_requesting_mutex
= true;
1139 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1140 qemu_cpu_kick_thread(first_cpu
);
1141 qemu_mutex_lock(&qemu_global_mutex
);
1143 iothread_requesting_mutex
= false;
1144 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1148 void qemu_mutex_unlock_iothread(void)
1150 qemu_mutex_unlock(&qemu_global_mutex
);
1153 static int all_vcpus_paused(void)
1158 if (!cpu
->stopped
) {
1166 void pause_all_vcpus(void)
1170 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1176 if (qemu_in_vcpu_thread()) {
1178 if (!kvm_enabled()) {
1181 cpu
->stopped
= true;
1187 while (!all_vcpus_paused()) {
1188 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1195 void cpu_resume(CPUState
*cpu
)
1198 cpu
->stopped
= false;
1202 void resume_all_vcpus(void)
1206 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1212 /* For temporary buffers for forming a name */
1213 #define VCPU_THREAD_NAME_SIZE 16
1215 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1217 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1219 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1221 /* share a single thread for all cpus with TCG */
1222 if (!tcg_cpu_thread
) {
1223 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1224 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1225 qemu_cond_init(cpu
->halt_cond
);
1226 tcg_halt_cond
= cpu
->halt_cond
;
1227 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1229 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1230 cpu
, QEMU_THREAD_JOINABLE
);
1232 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1234 while (!cpu
->created
) {
1235 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1237 tcg_cpu_thread
= cpu
->thread
;
1239 cpu
->thread
= tcg_cpu_thread
;
1240 cpu
->halt_cond
= tcg_halt_cond
;
1244 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1246 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1248 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1249 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1250 qemu_cond_init(cpu
->halt_cond
);
1251 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1253 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1254 cpu
, QEMU_THREAD_JOINABLE
);
1255 while (!cpu
->created
) {
1256 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1260 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1262 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1264 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1265 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1266 qemu_cond_init(cpu
->halt_cond
);
1267 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1269 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1270 QEMU_THREAD_JOINABLE
);
1271 while (!cpu
->created
) {
1272 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1276 void qemu_init_vcpu(CPUState
*cpu
)
1278 cpu
->nr_cores
= smp_cores
;
1279 cpu
->nr_threads
= smp_threads
;
1280 cpu
->stopped
= true;
1281 if (kvm_enabled()) {
1282 qemu_kvm_start_vcpu(cpu
);
1283 } else if (tcg_enabled()) {
1284 qemu_tcg_init_vcpu(cpu
);
1286 qemu_dummy_start_vcpu(cpu
);
1290 void cpu_stop_current(void)
1293 current_cpu
->stop
= false;
1294 current_cpu
->stopped
= true;
1295 cpu_exit(current_cpu
);
1296 qemu_cond_signal(&qemu_pause_cond
);
1300 int vm_stop(RunState state
)
1302 if (qemu_in_vcpu_thread()) {
1303 qemu_system_vmstop_request_prepare();
1304 qemu_system_vmstop_request(state
);
1306 * FIXME: should not return to device code in case
1307 * vm_stop() has been requested.
1313 return do_vm_stop(state
);
1316 /* does a state transition even if the VM is already stopped,
1317 current state is forgotten forever */
1318 int vm_stop_force_state(RunState state
)
1320 if (runstate_is_running()) {
1321 return vm_stop(state
);
1323 runstate_set(state
);
1324 /* Make sure to return an error if the flush in a previous vm_stop()
1326 return bdrv_flush_all();
1330 static int tcg_cpu_exec(CPUArchState
*env
)
1332 CPUState
*cpu
= ENV_GET_CPU(env
);
1334 #ifdef CONFIG_PROFILER
1338 #ifdef CONFIG_PROFILER
1339 ti
= profile_getclock();
1345 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1346 + cpu
->icount_extra
);
1347 cpu
->icount_decr
.u16
.low
= 0;
1348 cpu
->icount_extra
= 0;
1349 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1351 /* Maintain prior (possibly buggy) behaviour where if no deadline
1352 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1353 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1356 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1357 deadline
= INT32_MAX
;
1360 count
= qemu_icount_round(deadline
);
1361 timers_state
.qemu_icount
+= count
;
1362 decr
= (count
> 0xffff) ? 0xffff : count
;
1364 cpu
->icount_decr
.u16
.low
= decr
;
1365 cpu
->icount_extra
= count
;
1367 ret
= cpu_exec(env
);
1368 #ifdef CONFIG_PROFILER
1369 qemu_time
+= profile_getclock() - ti
;
1372 /* Fold pending instructions back into the
1373 instruction counter, and clear the interrupt flag. */
1374 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1375 + cpu
->icount_extra
);
1376 cpu
->icount_decr
.u32
= 0;
1377 cpu
->icount_extra
= 0;
1382 static void tcg_exec_all(void)
1386 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1387 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1389 if (next_cpu
== NULL
) {
1390 next_cpu
= first_cpu
;
1392 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1393 CPUState
*cpu
= next_cpu
;
1394 CPUArchState
*env
= cpu
->env_ptr
;
1396 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1397 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1399 if (cpu_can_run(cpu
)) {
1400 r
= tcg_cpu_exec(env
);
1401 if (r
== EXCP_DEBUG
) {
1402 cpu_handle_guest_debug(cpu
);
1405 } else if (cpu
->stop
|| cpu
->stopped
) {
1412 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1414 /* XXX: implement xxx_cpu_list for targets that still miss it */
1415 #if defined(cpu_list)
1416 cpu_list(f
, cpu_fprintf
);
1420 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1422 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1427 #if defined(TARGET_I386)
1428 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1429 CPUX86State
*env
= &x86_cpu
->env
;
1430 #elif defined(TARGET_PPC)
1431 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1432 CPUPPCState
*env
= &ppc_cpu
->env
;
1433 #elif defined(TARGET_SPARC)
1434 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1435 CPUSPARCState
*env
= &sparc_cpu
->env
;
1436 #elif defined(TARGET_MIPS)
1437 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1438 CPUMIPSState
*env
= &mips_cpu
->env
;
1439 #elif defined(TARGET_TRICORE)
1440 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1441 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1444 cpu_synchronize_state(cpu
);
1446 info
= g_malloc0(sizeof(*info
));
1447 info
->value
= g_malloc0(sizeof(*info
->value
));
1448 info
->value
->CPU
= cpu
->cpu_index
;
1449 info
->value
->current
= (cpu
== first_cpu
);
1450 info
->value
->halted
= cpu
->halted
;
1451 info
->value
->thread_id
= cpu
->thread_id
;
1452 #if defined(TARGET_I386)
1453 info
->value
->has_pc
= true;
1454 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1455 #elif defined(TARGET_PPC)
1456 info
->value
->has_nip
= true;
1457 info
->value
->nip
= env
->nip
;
1458 #elif defined(TARGET_SPARC)
1459 info
->value
->has_pc
= true;
1460 info
->value
->pc
= env
->pc
;
1461 info
->value
->has_npc
= true;
1462 info
->value
->npc
= env
->npc
;
1463 #elif defined(TARGET_MIPS)
1464 info
->value
->has_PC
= true;
1465 info
->value
->PC
= env
->active_tc
.PC
;
1466 #elif defined(TARGET_TRICORE)
1467 info
->value
->has_PC
= true;
1468 info
->value
->PC
= env
->PC
;
1471 /* XXX: waiting for the qapi to support GSList */
1473 head
= cur_item
= info
;
1475 cur_item
->next
= info
;
1483 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1484 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1495 cpu
= qemu_get_cpu(cpu_index
);
1497 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1502 f
= fopen(filename
, "wb");
1504 error_setg_file_open(errp
, errno
, filename
);
1512 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1513 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"specified", addr
);
1516 if (fwrite(buf
, 1, l
, f
) != l
) {
1517 error_set(errp
, QERR_IO_ERROR
);
1528 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1535 f
= fopen(filename
, "wb");
1537 error_setg_file_open(errp
, errno
, filename
);
1545 cpu_physical_memory_read(addr
, buf
, l
);
1546 if (fwrite(buf
, 1, l
, f
) != l
) {
1547 error_set(errp
, QERR_IO_ERROR
);
1558 void qmp_inject_nmi(Error
**errp
)
1560 #if defined(TARGET_I386)
1564 X86CPU
*cpu
= X86_CPU(cs
);
1566 if (!cpu
->apic_state
) {
1567 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1569 apic_deliver_nmi(cpu
->apic_state
);
1573 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1577 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1583 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1584 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1585 if (icount_align_option
) {
1586 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1587 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1589 cpu_fprintf(f
, "Max guest delay NA\n");
1590 cpu_fprintf(f
, "Max guest advance NA\n");