4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState
*next_cpu
;
71 bool cpu_is_stopped(CPUState
*cpu
)
73 return cpu
->stopped
|| !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState
*cpu
)
78 if (cpu
->stop
|| cpu
->queued_work_first
) {
81 if (cpu_is_stopped(cpu
)) {
84 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu
)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static bool icount_sleep
= true;
109 static int64_t vm_clock_warp_start
= -1;
110 /* Conversion factor from emulated instructions to virtual clock ticks. */
111 static int icount_time_shift
;
112 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
113 #define MAX_ICOUNT_SHIFT 10
115 static QEMUTimer
*icount_rt_timer
;
116 static QEMUTimer
*icount_vm_timer
;
117 static QEMUTimer
*icount_warp_timer
;
119 typedef struct TimersState
{
120 /* Protected by BQL. */
121 int64_t cpu_ticks_prev
;
122 int64_t cpu_ticks_offset
;
124 /* cpu_clock_offset can be read out of BQL, so protect it with
127 QemuSeqLock vm_clock_seqlock
;
128 int64_t cpu_clock_offset
;
129 int32_t cpu_ticks_enabled
;
132 /* Compensate for varying guest execution speed. */
133 int64_t qemu_icount_bias
;
134 /* Only written by TCG thread */
138 static TimersState timers_state
;
140 int64_t cpu_get_icount_raw(void)
143 CPUState
*cpu
= current_cpu
;
145 icount
= timers_state
.qemu_icount
;
147 if (!cpu_can_do_io(cpu
)) {
148 fprintf(stderr
, "Bad icount read\n");
151 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
156 /* Return the virtual CPU time, based on the instruction counter. */
157 static int64_t cpu_get_icount_locked(void)
159 int64_t icount
= cpu_get_icount_raw();
160 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
163 int64_t cpu_get_icount(void)
169 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
170 icount
= cpu_get_icount_locked();
171 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
176 int64_t cpu_icount_to_ns(int64_t icount
)
178 return icount
<< icount_time_shift
;
181 /* return the host CPU cycle counter and handle stop/restart */
182 /* Caller must hold the BQL */
183 int64_t cpu_get_ticks(void)
188 return cpu_get_icount();
191 ticks
= timers_state
.cpu_ticks_offset
;
192 if (timers_state
.cpu_ticks_enabled
) {
193 ticks
+= cpu_get_real_ticks();
196 if (timers_state
.cpu_ticks_prev
> ticks
) {
197 /* Note: non increasing ticks may happen if the host uses
199 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
200 ticks
= timers_state
.cpu_ticks_prev
;
203 timers_state
.cpu_ticks_prev
= ticks
;
207 static int64_t cpu_get_clock_locked(void)
211 ticks
= timers_state
.cpu_clock_offset
;
212 if (timers_state
.cpu_ticks_enabled
) {
213 ticks
+= get_clock();
219 /* return the host CPU monotonic timer and handle stop/restart */
220 int64_t cpu_get_clock(void)
226 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
227 ti
= cpu_get_clock_locked();
228 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
233 /* enable cpu_get_ticks()
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 void cpu_enable_ticks(void)
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
240 if (!timers_state
.cpu_ticks_enabled
) {
241 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
242 timers_state
.cpu_clock_offset
-= get_clock();
243 timers_state
.cpu_ticks_enabled
= 1;
245 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
248 /* disable cpu_get_ticks() : the clock is stopped. You must not call
249 * cpu_get_ticks() after that.
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 void cpu_disable_ticks(void)
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
256 if (timers_state
.cpu_ticks_enabled
) {
257 timers_state
.cpu_ticks_offset
+= cpu_get_real_ticks();
258 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
259 timers_state
.cpu_ticks_enabled
= 0;
261 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
264 /* Correlation between real and virtual time is always going to be
265 fairly approximate, so ignore small variation.
266 When the guest is idle real and virtual time will be aligned in
268 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270 static void icount_adjust(void)
276 /* Protected by TimersState mutex. */
277 static int64_t last_delta
;
279 /* If the VM is not running, then do nothing. */
280 if (!runstate_is_running()) {
284 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
285 cur_time
= cpu_get_clock_locked();
286 cur_icount
= cpu_get_icount_locked();
288 delta
= cur_icount
- cur_time
;
289 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
292 && icount_time_shift
> 0) {
293 /* The guest is getting too far ahead. Slow time down. */
297 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
298 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
299 /* The guest is getting too far behind. Speed time up. */
303 timers_state
.qemu_icount_bias
= cur_icount
304 - (timers_state
.qemu_icount
<< icount_time_shift
);
305 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
308 static void icount_adjust_rt(void *opaque
)
310 timer_mod(icount_rt_timer
,
311 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
315 static void icount_adjust_vm(void *opaque
)
317 timer_mod(icount_vm_timer
,
318 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
319 get_ticks_per_sec() / 10);
323 static int64_t qemu_icount_round(int64_t count
)
325 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
328 static void icount_warp_rt(void *opaque
)
330 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
331 * changes from -1 to another value, so the race here is okay.
333 if (atomic_read(&vm_clock_warp_start
) == -1) {
337 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
338 if (runstate_is_running()) {
339 int64_t clock
= cpu_get_clock_locked();
342 warp_delta
= clock
- vm_clock_warp_start
;
343 if (use_icount
== 2) {
345 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
346 * far ahead of real time.
348 int64_t cur_icount
= cpu_get_icount_locked();
349 int64_t delta
= clock
- cur_icount
;
350 warp_delta
= MIN(warp_delta
, delta
);
352 timers_state
.qemu_icount_bias
+= warp_delta
;
354 vm_clock_warp_start
= -1;
355 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
357 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
362 void qtest_clock_warp(int64_t dest
)
364 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
365 AioContext
*aio_context
;
366 assert(qtest_enabled());
367 aio_context
= qemu_get_aio_context();
368 while (clock
< dest
) {
369 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
370 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
372 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
373 timers_state
.qemu_icount_bias
+= warp
;
374 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
376 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
377 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
378 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
383 void qemu_clock_warp(QEMUClockType type
)
389 * There are too many global variables to make the "warp" behavior
390 * applicable to other clocks. But a clock argument removes the
391 * need for if statements all over the place.
393 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
399 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
400 * This ensures that the deadline for the timer is computed correctly
402 * This also makes sure that the insn counter is synchronized before
403 * the CPU starts running, in case the CPU is woken by an event other
404 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 icount_warp_rt(NULL
);
407 timer_del(icount_warp_timer
);
409 if (!all_cpu_threads_idle()) {
413 if (qtest_enabled()) {
414 /* When testing, qtest commands advance icount. */
418 /* We want to use the earliest deadline from ALL vm_clocks */
419 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
420 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
427 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
428 * sleep. Otherwise, the CPU might be waiting for a future timer
429 * interrupt to wake it up, but the interrupt never comes because
430 * the vCPU isn't running any insns and thus doesn't advance the
431 * QEMU_CLOCK_VIRTUAL.
435 * We never let VCPUs sleep in no sleep icount mode.
436 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
437 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
438 * It is useful when we want a deterministic execution time,
439 * isolated from host latencies.
441 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
442 timers_state
.qemu_icount_bias
+= deadline
;
443 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
444 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
447 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
448 * "real" time, (related to the time left until the next event) has
449 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
450 * This avoids that the warps are visible externally; for example,
451 * you will not be sending network packets continuously instead of
454 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
455 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
456 vm_clock_warp_start
= clock
;
458 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
459 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
461 } else if (deadline
== 0) {
462 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
466 static bool icount_state_needed(void *opaque
)
472 * This is a subsection for icount migration.
474 static const VMStateDescription icount_vmstate_timers
= {
475 .name
= "timer/icount",
477 .minimum_version_id
= 1,
478 .fields
= (VMStateField
[]) {
479 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
480 VMSTATE_INT64(qemu_icount
, TimersState
),
481 VMSTATE_END_OF_LIST()
485 static const VMStateDescription vmstate_timers
= {
488 .minimum_version_id
= 1,
489 .fields
= (VMStateField
[]) {
490 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
491 VMSTATE_INT64(dummy
, TimersState
),
492 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
493 VMSTATE_END_OF_LIST()
495 .subsections
= (VMStateSubsection
[]) {
497 .vmsd
= &icount_vmstate_timers
,
498 .needed
= icount_state_needed
,
505 void cpu_ticks_init(void)
507 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
508 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
511 void configure_icount(QemuOpts
*opts
, Error
**errp
)
514 char *rem_str
= NULL
;
516 option
= qemu_opt_get(opts
, "shift");
518 if (qemu_opt_get(opts
, "align") != NULL
) {
519 error_setg(errp
, "Please specify shift option when using align");
524 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
525 icount_warp_rt
, NULL
);
527 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
528 if (strcmp(option
, "auto") != 0) {
530 icount_time_shift
= strtol(option
, &rem_str
, 0);
531 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
532 error_setg(errp
, "icount: Invalid shift value");
536 } else if (icount_align_option
) {
537 error_setg(errp
, "shift=auto and align=on are incompatible");
542 /* 125MIPS seems a reasonable initial guess at the guest speed.
543 It will be corrected fairly quickly anyway. */
544 icount_time_shift
= 3;
546 /* Have both realtime and virtual time triggers for speed adjustment.
547 The realtime trigger catches emulated time passing too slowly,
548 the virtual time trigger catches emulated time passing too fast.
549 Realtime triggers occur even when idle, so use them less frequently
551 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
552 icount_adjust_rt
, NULL
);
553 timer_mod(icount_rt_timer
,
554 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
555 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
556 icount_adjust_vm
, NULL
);
557 timer_mod(icount_vm_timer
,
558 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
559 get_ticks_per_sec() / 10);
562 /***********************************************************/
563 void hw_error(const char *fmt
, ...)
569 fprintf(stderr
, "qemu: hardware error: ");
570 vfprintf(stderr
, fmt
, ap
);
571 fprintf(stderr
, "\n");
573 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
574 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
580 void cpu_synchronize_all_states(void)
585 cpu_synchronize_state(cpu
);
589 void cpu_synchronize_all_post_reset(void)
594 cpu_synchronize_post_reset(cpu
);
598 void cpu_synchronize_all_post_init(void)
603 cpu_synchronize_post_init(cpu
);
607 void cpu_clean_all_dirty(void)
612 cpu_clean_state(cpu
);
616 static int do_vm_stop(RunState state
)
620 if (runstate_is_running()) {
624 vm_state_notify(0, state
);
625 qapi_event_send_stop(&error_abort
);
629 ret
= bdrv_flush_all();
634 static bool cpu_can_run(CPUState
*cpu
)
639 if (cpu_is_stopped(cpu
)) {
645 static void cpu_handle_guest_debug(CPUState
*cpu
)
647 gdb_set_stop_cpu(cpu
);
648 qemu_system_debug_request();
652 static void cpu_signal(int sig
)
655 cpu_exit(current_cpu
);
661 static void sigbus_reraise(void)
664 struct sigaction action
;
666 memset(&action
, 0, sizeof(action
));
667 action
.sa_handler
= SIG_DFL
;
668 if (!sigaction(SIGBUS
, &action
, NULL
)) {
671 sigaddset(&set
, SIGBUS
);
672 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
674 perror("Failed to re-raise SIGBUS!\n");
678 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
681 if (kvm_on_sigbus(siginfo
->ssi_code
,
682 (void *)(intptr_t)siginfo
->ssi_addr
)) {
687 static void qemu_init_sigbus(void)
689 struct sigaction action
;
691 memset(&action
, 0, sizeof(action
));
692 action
.sa_flags
= SA_SIGINFO
;
693 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
694 sigaction(SIGBUS
, &action
, NULL
);
696 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
699 static void qemu_kvm_eat_signals(CPUState
*cpu
)
701 struct timespec ts
= { 0, 0 };
707 sigemptyset(&waitset
);
708 sigaddset(&waitset
, SIG_IPI
);
709 sigaddset(&waitset
, SIGBUS
);
712 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
713 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
714 perror("sigtimedwait");
720 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
728 r
= sigpending(&chkset
);
730 perror("sigpending");
733 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
736 #else /* !CONFIG_LINUX */
738 static void qemu_init_sigbus(void)
742 static void qemu_kvm_eat_signals(CPUState
*cpu
)
745 #endif /* !CONFIG_LINUX */
748 static void dummy_signal(int sig
)
752 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
756 struct sigaction sigact
;
758 memset(&sigact
, 0, sizeof(sigact
));
759 sigact
.sa_handler
= dummy_signal
;
760 sigaction(SIG_IPI
, &sigact
, NULL
);
762 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
763 sigdelset(&set
, SIG_IPI
);
764 sigdelset(&set
, SIGBUS
);
765 r
= kvm_set_signal_mask(cpu
, &set
);
767 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
772 static void qemu_tcg_init_cpu_signals(void)
775 struct sigaction sigact
;
777 memset(&sigact
, 0, sizeof(sigact
));
778 sigact
.sa_handler
= cpu_signal
;
779 sigaction(SIG_IPI
, &sigact
, NULL
);
782 sigaddset(&set
, SIG_IPI
);
783 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
787 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
792 static void qemu_tcg_init_cpu_signals(void)
797 static QemuMutex qemu_global_mutex
;
798 static QemuCond qemu_io_proceeded_cond
;
799 static unsigned iothread_requesting_mutex
;
801 static QemuThread io_thread
;
803 static QemuThread
*tcg_cpu_thread
;
804 static QemuCond
*tcg_halt_cond
;
807 static QemuCond qemu_cpu_cond
;
809 static QemuCond qemu_pause_cond
;
810 static QemuCond qemu_work_cond
;
812 void qemu_init_cpu_loop(void)
815 qemu_cond_init(&qemu_cpu_cond
);
816 qemu_cond_init(&qemu_pause_cond
);
817 qemu_cond_init(&qemu_work_cond
);
818 qemu_cond_init(&qemu_io_proceeded_cond
);
819 qemu_mutex_init(&qemu_global_mutex
);
821 qemu_thread_get_self(&io_thread
);
824 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
826 struct qemu_work_item wi
;
828 if (qemu_cpu_is_self(cpu
)) {
836 if (cpu
->queued_work_first
== NULL
) {
837 cpu
->queued_work_first
= &wi
;
839 cpu
->queued_work_last
->next
= &wi
;
841 cpu
->queued_work_last
= &wi
;
847 CPUState
*self_cpu
= current_cpu
;
849 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
850 current_cpu
= self_cpu
;
854 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
856 struct qemu_work_item
*wi
;
858 if (qemu_cpu_is_self(cpu
)) {
863 wi
= g_malloc0(sizeof(struct qemu_work_item
));
867 if (cpu
->queued_work_first
== NULL
) {
868 cpu
->queued_work_first
= wi
;
870 cpu
->queued_work_last
->next
= wi
;
872 cpu
->queued_work_last
= wi
;
879 static void flush_queued_work(CPUState
*cpu
)
881 struct qemu_work_item
*wi
;
883 if (cpu
->queued_work_first
== NULL
) {
887 while ((wi
= cpu
->queued_work_first
)) {
888 cpu
->queued_work_first
= wi
->next
;
895 cpu
->queued_work_last
= NULL
;
896 qemu_cond_broadcast(&qemu_work_cond
);
899 static void qemu_wait_io_event_common(CPUState
*cpu
)
904 qemu_cond_signal(&qemu_pause_cond
);
906 flush_queued_work(cpu
);
907 cpu
->thread_kicked
= false;
910 static void qemu_tcg_wait_io_event(void)
914 while (all_cpu_threads_idle()) {
915 /* Start accounting real time to the virtual clock if the CPUs
917 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
918 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
921 while (iothread_requesting_mutex
) {
922 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
926 qemu_wait_io_event_common(cpu
);
930 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
932 while (cpu_thread_is_idle(cpu
)) {
933 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
936 qemu_kvm_eat_signals(cpu
);
937 qemu_wait_io_event_common(cpu
);
940 static void *qemu_kvm_cpu_thread_fn(void *arg
)
945 qemu_mutex_lock(&qemu_global_mutex
);
946 qemu_thread_get_self(cpu
->thread
);
947 cpu
->thread_id
= qemu_get_thread_id();
951 r
= kvm_init_vcpu(cpu
);
953 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
957 qemu_kvm_init_cpu_signals(cpu
);
959 /* signal CPU creation */
961 qemu_cond_signal(&qemu_cpu_cond
);
964 if (cpu_can_run(cpu
)) {
965 r
= kvm_cpu_exec(cpu
);
966 if (r
== EXCP_DEBUG
) {
967 cpu_handle_guest_debug(cpu
);
970 qemu_kvm_wait_io_event(cpu
);
976 static void *qemu_dummy_cpu_thread_fn(void *arg
)
979 fprintf(stderr
, "qtest is not supported under Windows\n");
986 qemu_mutex_lock_iothread();
987 qemu_thread_get_self(cpu
->thread
);
988 cpu
->thread_id
= qemu_get_thread_id();
991 sigemptyset(&waitset
);
992 sigaddset(&waitset
, SIG_IPI
);
994 /* signal CPU creation */
996 qemu_cond_signal(&qemu_cpu_cond
);
1001 qemu_mutex_unlock_iothread();
1004 r
= sigwait(&waitset
, &sig
);
1005 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1010 qemu_mutex_lock_iothread();
1012 qemu_wait_io_event_common(cpu
);
1019 static void tcg_exec_all(void);
1021 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1023 CPUState
*cpu
= arg
;
1025 qemu_tcg_init_cpu_signals();
1026 qemu_thread_get_self(cpu
->thread
);
1028 qemu_mutex_lock(&qemu_global_mutex
);
1030 cpu
->thread_id
= qemu_get_thread_id();
1031 cpu
->created
= true;
1034 qemu_cond_signal(&qemu_cpu_cond
);
1036 /* wait for initial kick-off after machine start */
1037 while (first_cpu
->stopped
) {
1038 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
1040 /* process any pending work */
1042 qemu_wait_io_event_common(cpu
);
1046 /* process any pending work */
1053 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1055 if (deadline
== 0) {
1056 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1059 qemu_tcg_wait_io_event();
1065 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1070 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1072 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1076 if (!qemu_cpu_is_self(cpu
)) {
1079 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
1080 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1085 /* On multi-core systems, we are not sure that the thread is actually
1086 * suspended until we can get the context.
1088 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
1089 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
1095 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
1096 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
1104 void qemu_cpu_kick(CPUState
*cpu
)
1106 qemu_cond_broadcast(cpu
->halt_cond
);
1107 if (!tcg_enabled() && !cpu
->thread_kicked
) {
1108 qemu_cpu_kick_thread(cpu
);
1109 cpu
->thread_kicked
= true;
1113 void qemu_cpu_kick_self(void)
1116 assert(current_cpu
);
1118 if (!current_cpu
->thread_kicked
) {
1119 qemu_cpu_kick_thread(current_cpu
);
1120 current_cpu
->thread_kicked
= true;
1127 bool qemu_cpu_is_self(CPUState
*cpu
)
1129 return qemu_thread_is_self(cpu
->thread
);
1132 bool qemu_in_vcpu_thread(void)
1134 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1137 void qemu_mutex_lock_iothread(void)
1139 atomic_inc(&iothread_requesting_mutex
);
1140 if (!tcg_enabled() || !first_cpu
|| !first_cpu
->thread
) {
1141 qemu_mutex_lock(&qemu_global_mutex
);
1142 atomic_dec(&iothread_requesting_mutex
);
1144 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1145 qemu_cpu_kick_thread(first_cpu
);
1146 qemu_mutex_lock(&qemu_global_mutex
);
1148 atomic_dec(&iothread_requesting_mutex
);
1149 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1153 void qemu_mutex_unlock_iothread(void)
1155 qemu_mutex_unlock(&qemu_global_mutex
);
1158 static int all_vcpus_paused(void)
1163 if (!cpu
->stopped
) {
1171 void pause_all_vcpus(void)
1175 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1181 if (qemu_in_vcpu_thread()) {
1183 if (!kvm_enabled()) {
1186 cpu
->stopped
= true;
1192 while (!all_vcpus_paused()) {
1193 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1200 void cpu_resume(CPUState
*cpu
)
1203 cpu
->stopped
= false;
1207 void resume_all_vcpus(void)
1211 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1217 /* For temporary buffers for forming a name */
1218 #define VCPU_THREAD_NAME_SIZE 16
1220 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1222 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1224 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1226 /* share a single thread for all cpus with TCG */
1227 if (!tcg_cpu_thread
) {
1228 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1229 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1230 qemu_cond_init(cpu
->halt_cond
);
1231 tcg_halt_cond
= cpu
->halt_cond
;
1232 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1234 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1235 cpu
, QEMU_THREAD_JOINABLE
);
1237 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1239 while (!cpu
->created
) {
1240 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1242 tcg_cpu_thread
= cpu
->thread
;
1244 cpu
->thread
= tcg_cpu_thread
;
1245 cpu
->halt_cond
= tcg_halt_cond
;
1249 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1251 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1253 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1254 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1255 qemu_cond_init(cpu
->halt_cond
);
1256 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1258 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1259 cpu
, QEMU_THREAD_JOINABLE
);
1260 while (!cpu
->created
) {
1261 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1265 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1267 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1269 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1270 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1271 qemu_cond_init(cpu
->halt_cond
);
1272 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1274 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1275 QEMU_THREAD_JOINABLE
);
1276 while (!cpu
->created
) {
1277 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1281 void qemu_init_vcpu(CPUState
*cpu
)
1283 cpu
->nr_cores
= smp_cores
;
1284 cpu
->nr_threads
= smp_threads
;
1285 cpu
->stopped
= true;
1286 if (kvm_enabled()) {
1287 qemu_kvm_start_vcpu(cpu
);
1288 } else if (tcg_enabled()) {
1289 qemu_tcg_init_vcpu(cpu
);
1291 qemu_dummy_start_vcpu(cpu
);
1295 void cpu_stop_current(void)
1298 current_cpu
->stop
= false;
1299 current_cpu
->stopped
= true;
1300 cpu_exit(current_cpu
);
1301 qemu_cond_signal(&qemu_pause_cond
);
1305 int vm_stop(RunState state
)
1307 if (qemu_in_vcpu_thread()) {
1308 qemu_system_vmstop_request_prepare();
1309 qemu_system_vmstop_request(state
);
1311 * FIXME: should not return to device code in case
1312 * vm_stop() has been requested.
1318 return do_vm_stop(state
);
1321 /* does a state transition even if the VM is already stopped,
1322 current state is forgotten forever */
1323 int vm_stop_force_state(RunState state
)
1325 if (runstate_is_running()) {
1326 return vm_stop(state
);
1328 runstate_set(state
);
1329 /* Make sure to return an error if the flush in a previous vm_stop()
1331 return bdrv_flush_all();
1335 static int tcg_cpu_exec(CPUArchState
*env
)
1337 CPUState
*cpu
= ENV_GET_CPU(env
);
1339 #ifdef CONFIG_PROFILER
1343 #ifdef CONFIG_PROFILER
1344 ti
= profile_getclock();
1350 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1351 + cpu
->icount_extra
);
1352 cpu
->icount_decr
.u16
.low
= 0;
1353 cpu
->icount_extra
= 0;
1354 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1356 /* Maintain prior (possibly buggy) behaviour where if no deadline
1357 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1358 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1361 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1362 deadline
= INT32_MAX
;
1365 count
= qemu_icount_round(deadline
);
1366 timers_state
.qemu_icount
+= count
;
1367 decr
= (count
> 0xffff) ? 0xffff : count
;
1369 cpu
->icount_decr
.u16
.low
= decr
;
1370 cpu
->icount_extra
= count
;
1372 ret
= cpu_exec(env
);
1373 #ifdef CONFIG_PROFILER
1374 tcg_time
+= profile_getclock() - ti
;
1377 /* Fold pending instructions back into the
1378 instruction counter, and clear the interrupt flag. */
1379 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1380 + cpu
->icount_extra
);
1381 cpu
->icount_decr
.u32
= 0;
1382 cpu
->icount_extra
= 0;
1387 static void tcg_exec_all(void)
1391 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1392 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1394 if (next_cpu
== NULL
) {
1395 next_cpu
= first_cpu
;
1397 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1398 CPUState
*cpu
= next_cpu
;
1399 CPUArchState
*env
= cpu
->env_ptr
;
1401 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1402 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1404 if (cpu_can_run(cpu
)) {
1405 r
= tcg_cpu_exec(env
);
1406 if (r
== EXCP_DEBUG
) {
1407 cpu_handle_guest_debug(cpu
);
1410 } else if (cpu
->stop
|| cpu
->stopped
) {
1417 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1419 /* XXX: implement xxx_cpu_list for targets that still miss it */
1420 #if defined(cpu_list)
1421 cpu_list(f
, cpu_fprintf
);
1425 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1427 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1432 #if defined(TARGET_I386)
1433 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1434 CPUX86State
*env
= &x86_cpu
->env
;
1435 #elif defined(TARGET_PPC)
1436 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1437 CPUPPCState
*env
= &ppc_cpu
->env
;
1438 #elif defined(TARGET_SPARC)
1439 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1440 CPUSPARCState
*env
= &sparc_cpu
->env
;
1441 #elif defined(TARGET_MIPS)
1442 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1443 CPUMIPSState
*env
= &mips_cpu
->env
;
1444 #elif defined(TARGET_TRICORE)
1445 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1446 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1449 cpu_synchronize_state(cpu
);
1451 info
= g_malloc0(sizeof(*info
));
1452 info
->value
= g_malloc0(sizeof(*info
->value
));
1453 info
->value
->CPU
= cpu
->cpu_index
;
1454 info
->value
->current
= (cpu
== first_cpu
);
1455 info
->value
->halted
= cpu
->halted
;
1456 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1457 info
->value
->thread_id
= cpu
->thread_id
;
1458 #if defined(TARGET_I386)
1459 info
->value
->has_pc
= true;
1460 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1461 #elif defined(TARGET_PPC)
1462 info
->value
->has_nip
= true;
1463 info
->value
->nip
= env
->nip
;
1464 #elif defined(TARGET_SPARC)
1465 info
->value
->has_pc
= true;
1466 info
->value
->pc
= env
->pc
;
1467 info
->value
->has_npc
= true;
1468 info
->value
->npc
= env
->npc
;
1469 #elif defined(TARGET_MIPS)
1470 info
->value
->has_PC
= true;
1471 info
->value
->PC
= env
->active_tc
.PC
;
1472 #elif defined(TARGET_TRICORE)
1473 info
->value
->has_PC
= true;
1474 info
->value
->PC
= env
->PC
;
1477 /* XXX: waiting for the qapi to support GSList */
1479 head
= cur_item
= info
;
1481 cur_item
->next
= info
;
1489 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1490 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1496 int64_t orig_addr
= addr
, orig_size
= size
;
1502 cpu
= qemu_get_cpu(cpu_index
);
1504 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1509 f
= fopen(filename
, "wb");
1511 error_setg_file_open(errp
, errno
, filename
);
1519 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1520 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1521 " specified", orig_addr
, orig_size
);
1524 if (fwrite(buf
, 1, l
, f
) != l
) {
1525 error_set(errp
, QERR_IO_ERROR
);
1536 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1543 f
= fopen(filename
, "wb");
1545 error_setg_file_open(errp
, errno
, filename
);
1553 cpu_physical_memory_read(addr
, buf
, l
);
1554 if (fwrite(buf
, 1, l
, f
) != l
) {
1555 error_set(errp
, QERR_IO_ERROR
);
1566 void qmp_inject_nmi(Error
**errp
)
1568 #if defined(TARGET_I386)
1572 X86CPU
*cpu
= X86_CPU(cs
);
1574 if (!cpu
->apic_state
) {
1575 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1577 apic_deliver_nmi(cpu
->apic_state
);
1581 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1585 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1591 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1592 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1593 if (icount_align_option
) {
1594 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1595 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1597 cpu_fprintf(f
, "Max guest delay NA\n");
1598 cpu_fprintf(f
, "Max guest advance NA\n");