4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "qemu/error-report.h"
31 #include "sysemu/sysemu.h"
32 #include "exec/gdbstub.h"
33 #include "sysemu/dma.h"
34 #include "sysemu/kvm.h"
35 #include "qmp-commands.h"
37 #include "qemu/thread.h"
38 #include "sysemu/cpus.h"
39 #include "sysemu/qtest.h"
40 #include "qemu/main-loop.h"
41 #include "qemu/bitmap.h"
42 #include "qemu/seqlock.h"
43 #include "qapi-event.h"
47 #include "qemu/compatfd.h"
52 #include <sys/prctl.h>
55 #define PR_MCE_KILL 33
58 #ifndef PR_MCE_KILL_SET
59 #define PR_MCE_KILL_SET 1
62 #ifndef PR_MCE_KILL_EARLY
63 #define PR_MCE_KILL_EARLY 1
66 #endif /* CONFIG_LINUX */
68 static CPUState
*next_cpu
;
72 /* vcpu throttling controls */
73 static QEMUTimer
*throttle_timer
;
74 static unsigned int throttle_percentage
;
76 #define CPU_THROTTLE_PCT_MIN 1
77 #define CPU_THROTTLE_PCT_MAX 99
78 #define CPU_THROTTLE_TIMESLICE_NS 10000000
80 bool cpu_is_stopped(CPUState
*cpu
)
82 return cpu
->stopped
|| !runstate_is_running();
85 static bool cpu_thread_is_idle(CPUState
*cpu
)
87 if (cpu
->stop
|| cpu
->queued_work_first
) {
90 if (cpu_is_stopped(cpu
)) {
93 if (!cpu
->halted
|| cpu_has_work(cpu
) ||
94 kvm_halt_in_kernel()) {
100 static bool all_cpu_threads_idle(void)
105 if (!cpu_thread_is_idle(cpu
)) {
112 /***********************************************************/
113 /* guest cycle counter */
115 /* Protected by TimersState seqlock */
117 static bool icount_sleep
= true;
118 static int64_t vm_clock_warp_start
= -1;
119 /* Conversion factor from emulated instructions to virtual clock ticks. */
120 static int icount_time_shift
;
121 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
122 #define MAX_ICOUNT_SHIFT 10
124 static QEMUTimer
*icount_rt_timer
;
125 static QEMUTimer
*icount_vm_timer
;
126 static QEMUTimer
*icount_warp_timer
;
128 typedef struct TimersState
{
129 /* Protected by BQL. */
130 int64_t cpu_ticks_prev
;
131 int64_t cpu_ticks_offset
;
133 /* cpu_clock_offset can be read out of BQL, so protect it with
136 QemuSeqLock vm_clock_seqlock
;
137 int64_t cpu_clock_offset
;
138 int32_t cpu_ticks_enabled
;
141 /* Compensate for varying guest execution speed. */
142 int64_t qemu_icount_bias
;
143 /* Only written by TCG thread */
147 static TimersState timers_state
;
149 int64_t cpu_get_icount_raw(void)
152 CPUState
*cpu
= current_cpu
;
154 icount
= timers_state
.qemu_icount
;
156 if (!cpu
->can_do_io
) {
157 fprintf(stderr
, "Bad icount read\n");
160 icount
-= (cpu
->icount_decr
.u16
.low
+ cpu
->icount_extra
);
165 /* Return the virtual CPU time, based on the instruction counter. */
166 static int64_t cpu_get_icount_locked(void)
168 int64_t icount
= cpu_get_icount_raw();
169 return timers_state
.qemu_icount_bias
+ cpu_icount_to_ns(icount
);
172 int64_t cpu_get_icount(void)
178 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
179 icount
= cpu_get_icount_locked();
180 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
185 int64_t cpu_icount_to_ns(int64_t icount
)
187 return icount
<< icount_time_shift
;
190 /* return the host CPU cycle counter and handle stop/restart */
191 /* Caller must hold the BQL */
192 int64_t cpu_get_ticks(void)
197 return cpu_get_icount();
200 ticks
= timers_state
.cpu_ticks_offset
;
201 if (timers_state
.cpu_ticks_enabled
) {
202 ticks
+= cpu_get_host_ticks();
205 if (timers_state
.cpu_ticks_prev
> ticks
) {
206 /* Note: non increasing ticks may happen if the host uses
208 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
209 ticks
= timers_state
.cpu_ticks_prev
;
212 timers_state
.cpu_ticks_prev
= ticks
;
216 static int64_t cpu_get_clock_locked(void)
220 ticks
= timers_state
.cpu_clock_offset
;
221 if (timers_state
.cpu_ticks_enabled
) {
222 ticks
+= get_clock();
228 /* return the host CPU monotonic timer and handle stop/restart */
229 int64_t cpu_get_clock(void)
235 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
236 ti
= cpu_get_clock_locked();
237 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
242 /* enable cpu_get_ticks()
243 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
245 void cpu_enable_ticks(void)
247 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
248 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
249 if (!timers_state
.cpu_ticks_enabled
) {
250 timers_state
.cpu_ticks_offset
-= cpu_get_host_ticks();
251 timers_state
.cpu_clock_offset
-= get_clock();
252 timers_state
.cpu_ticks_enabled
= 1;
254 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
257 /* disable cpu_get_ticks() : the clock is stopped. You must not call
258 * cpu_get_ticks() after that.
259 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
261 void cpu_disable_ticks(void)
263 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
264 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
265 if (timers_state
.cpu_ticks_enabled
) {
266 timers_state
.cpu_ticks_offset
+= cpu_get_host_ticks();
267 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
268 timers_state
.cpu_ticks_enabled
= 0;
270 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
273 /* Correlation between real and virtual time is always going to be
274 fairly approximate, so ignore small variation.
275 When the guest is idle real and virtual time will be aligned in
277 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
279 static void icount_adjust(void)
285 /* Protected by TimersState mutex. */
286 static int64_t last_delta
;
288 /* If the VM is not running, then do nothing. */
289 if (!runstate_is_running()) {
293 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
294 cur_time
= cpu_get_clock_locked();
295 cur_icount
= cpu_get_icount_locked();
297 delta
= cur_icount
- cur_time
;
298 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
300 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
301 && icount_time_shift
> 0) {
302 /* The guest is getting too far ahead. Slow time down. */
306 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
307 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
308 /* The guest is getting too far behind. Speed time up. */
312 timers_state
.qemu_icount_bias
= cur_icount
313 - (timers_state
.qemu_icount
<< icount_time_shift
);
314 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
317 static void icount_adjust_rt(void *opaque
)
319 timer_mod(icount_rt_timer
,
320 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
324 static void icount_adjust_vm(void *opaque
)
326 timer_mod(icount_vm_timer
,
327 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
328 get_ticks_per_sec() / 10);
332 static int64_t qemu_icount_round(int64_t count
)
334 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
337 static void icount_warp_rt(void *opaque
)
339 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
340 * changes from -1 to another value, so the race here is okay.
342 if (atomic_read(&vm_clock_warp_start
) == -1) {
346 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
347 if (runstate_is_running()) {
348 int64_t clock
= cpu_get_clock_locked();
351 warp_delta
= clock
- vm_clock_warp_start
;
352 if (use_icount
== 2) {
354 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
355 * far ahead of real time.
357 int64_t cur_icount
= cpu_get_icount_locked();
358 int64_t delta
= clock
- cur_icount
;
359 warp_delta
= MIN(warp_delta
, delta
);
361 timers_state
.qemu_icount_bias
+= warp_delta
;
363 vm_clock_warp_start
= -1;
364 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
366 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
367 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
371 void qtest_clock_warp(int64_t dest
)
373 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
374 AioContext
*aio_context
;
375 assert(qtest_enabled());
376 aio_context
= qemu_get_aio_context();
377 while (clock
< dest
) {
378 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
379 int64_t warp
= qemu_soonest_timeout(dest
- clock
, deadline
);
381 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
382 timers_state
.qemu_icount_bias
+= warp
;
383 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
385 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
386 timerlist_run_timers(aio_context
->tlg
.tl
[QEMU_CLOCK_VIRTUAL
]);
387 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
389 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
392 void qemu_clock_warp(QEMUClockType type
)
398 * There are too many global variables to make the "warp" behavior
399 * applicable to other clocks. But a clock argument removes the
400 * need for if statements all over the place.
402 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
408 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
409 * This ensures that the deadline for the timer is computed correctly
411 * This also makes sure that the insn counter is synchronized before
412 * the CPU starts running, in case the CPU is woken by an event other
413 * than the earliest QEMU_CLOCK_VIRTUAL timer.
415 icount_warp_rt(NULL
);
416 timer_del(icount_warp_timer
);
418 if (!all_cpu_threads_idle()) {
422 if (qtest_enabled()) {
423 /* When testing, qtest commands advance icount. */
427 /* We want to use the earliest deadline from ALL vm_clocks */
428 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
);
429 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
431 static bool notified
;
432 if (!icount_sleep
&& !notified
) {
433 error_report("WARNING: icount sleep disabled and no active timers");
441 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
442 * sleep. Otherwise, the CPU might be waiting for a future timer
443 * interrupt to wake it up, but the interrupt never comes because
444 * the vCPU isn't running any insns and thus doesn't advance the
445 * QEMU_CLOCK_VIRTUAL.
449 * We never let VCPUs sleep in no sleep icount mode.
450 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
451 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
452 * It is useful when we want a deterministic execution time,
453 * isolated from host latencies.
455 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
456 timers_state
.qemu_icount_bias
+= deadline
;
457 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
458 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
461 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
462 * "real" time, (related to the time left until the next event) has
463 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
464 * This avoids that the warps are visible externally; for example,
465 * you will not be sending network packets continuously instead of
468 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
469 if (vm_clock_warp_start
== -1 || vm_clock_warp_start
> clock
) {
470 vm_clock_warp_start
= clock
;
472 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
473 timer_mod_anticipate(icount_warp_timer
, clock
+ deadline
);
475 } else if (deadline
== 0) {
476 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
480 static bool icount_state_needed(void *opaque
)
486 * This is a subsection for icount migration.
488 static const VMStateDescription icount_vmstate_timers
= {
489 .name
= "timer/icount",
491 .minimum_version_id
= 1,
492 .needed
= icount_state_needed
,
493 .fields
= (VMStateField
[]) {
494 VMSTATE_INT64(qemu_icount_bias
, TimersState
),
495 VMSTATE_INT64(qemu_icount
, TimersState
),
496 VMSTATE_END_OF_LIST()
500 static const VMStateDescription vmstate_timers
= {
503 .minimum_version_id
= 1,
504 .fields
= (VMStateField
[]) {
505 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
506 VMSTATE_INT64(dummy
, TimersState
),
507 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
508 VMSTATE_END_OF_LIST()
510 .subsections
= (const VMStateDescription
*[]) {
511 &icount_vmstate_timers
,
516 static void cpu_throttle_thread(void *opaque
)
518 CPUState
*cpu
= opaque
;
520 double throttle_ratio
;
523 if (!cpu_throttle_get_percentage()) {
527 pct
= (double)cpu_throttle_get_percentage()/100;
528 throttle_ratio
= pct
/ (1 - pct
);
529 sleeptime_ns
= (long)(throttle_ratio
* CPU_THROTTLE_TIMESLICE_NS
);
531 qemu_mutex_unlock_iothread();
532 atomic_set(&cpu
->throttle_thread_scheduled
, 0);
533 g_usleep(sleeptime_ns
/ 1000); /* Convert ns to us for usleep call */
534 qemu_mutex_lock_iothread();
537 static void cpu_throttle_timer_tick(void *opaque
)
542 /* Stop the timer if needed */
543 if (!cpu_throttle_get_percentage()) {
547 if (!atomic_xchg(&cpu
->throttle_thread_scheduled
, 1)) {
548 async_run_on_cpu(cpu
, cpu_throttle_thread
, cpu
);
552 pct
= (double)cpu_throttle_get_percentage()/100;
553 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
554 CPU_THROTTLE_TIMESLICE_NS
/ (1-pct
));
557 void cpu_throttle_set(int new_throttle_pct
)
559 /* Ensure throttle percentage is within valid range */
560 new_throttle_pct
= MIN(new_throttle_pct
, CPU_THROTTLE_PCT_MAX
);
561 new_throttle_pct
= MAX(new_throttle_pct
, CPU_THROTTLE_PCT_MIN
);
563 atomic_set(&throttle_percentage
, new_throttle_pct
);
565 timer_mod(throttle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT
) +
566 CPU_THROTTLE_TIMESLICE_NS
);
569 void cpu_throttle_stop(void)
571 atomic_set(&throttle_percentage
, 0);
574 bool cpu_throttle_active(void)
576 return (cpu_throttle_get_percentage() != 0);
579 int cpu_throttle_get_percentage(void)
581 return atomic_read(&throttle_percentage
);
584 void cpu_ticks_init(void)
586 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
587 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
588 throttle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
589 cpu_throttle_timer_tick
, NULL
);
592 void configure_icount(QemuOpts
*opts
, Error
**errp
)
595 char *rem_str
= NULL
;
597 option
= qemu_opt_get(opts
, "shift");
599 if (qemu_opt_get(opts
, "align") != NULL
) {
600 error_setg(errp
, "Please specify shift option when using align");
605 icount_sleep
= qemu_opt_get_bool(opts
, "sleep", true);
607 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL_RT
,
608 icount_warp_rt
, NULL
);
611 icount_align_option
= qemu_opt_get_bool(opts
, "align", false);
613 if (icount_align_option
&& !icount_sleep
) {
614 error_setg(errp
, "align=on and sleep=no are incompatible");
616 if (strcmp(option
, "auto") != 0) {
618 icount_time_shift
= strtol(option
, &rem_str
, 0);
619 if (errno
!= 0 || *rem_str
!= '\0' || !strlen(option
)) {
620 error_setg(errp
, "icount: Invalid shift value");
624 } else if (icount_align_option
) {
625 error_setg(errp
, "shift=auto and align=on are incompatible");
626 } else if (!icount_sleep
) {
627 error_setg(errp
, "shift=auto and sleep=no are incompatible");
632 /* 125MIPS seems a reasonable initial guess at the guest speed.
633 It will be corrected fairly quickly anyway. */
634 icount_time_shift
= 3;
636 /* Have both realtime and virtual time triggers for speed adjustment.
637 The realtime trigger catches emulated time passing too slowly,
638 the virtual time trigger catches emulated time passing too fast.
639 Realtime triggers occur even when idle, so use them less frequently
641 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_VIRTUAL_RT
,
642 icount_adjust_rt
, NULL
);
643 timer_mod(icount_rt_timer
,
644 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT
) + 1000);
645 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
646 icount_adjust_vm
, NULL
);
647 timer_mod(icount_vm_timer
,
648 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
649 get_ticks_per_sec() / 10);
652 /***********************************************************/
653 void hw_error(const char *fmt
, ...)
659 fprintf(stderr
, "qemu: hardware error: ");
660 vfprintf(stderr
, fmt
, ap
);
661 fprintf(stderr
, "\n");
663 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
664 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
670 void cpu_synchronize_all_states(void)
675 cpu_synchronize_state(cpu
);
679 void cpu_synchronize_all_post_reset(void)
684 cpu_synchronize_post_reset(cpu
);
688 void cpu_synchronize_all_post_init(void)
693 cpu_synchronize_post_init(cpu
);
697 static int do_vm_stop(RunState state
)
701 if (runstate_is_running()) {
705 vm_state_notify(0, state
);
706 qapi_event_send_stop(&error_abort
);
710 ret
= bdrv_flush_all();
715 static bool cpu_can_run(CPUState
*cpu
)
720 if (cpu_is_stopped(cpu
)) {
726 static void cpu_handle_guest_debug(CPUState
*cpu
)
728 gdb_set_stop_cpu(cpu
);
729 qemu_system_debug_request();
734 static void sigbus_reraise(void)
737 struct sigaction action
;
739 memset(&action
, 0, sizeof(action
));
740 action
.sa_handler
= SIG_DFL
;
741 if (!sigaction(SIGBUS
, &action
, NULL
)) {
744 sigaddset(&set
, SIGBUS
);
745 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
747 perror("Failed to re-raise SIGBUS!\n");
751 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
754 if (kvm_on_sigbus(siginfo
->ssi_code
,
755 (void *)(intptr_t)siginfo
->ssi_addr
)) {
760 static void qemu_init_sigbus(void)
762 struct sigaction action
;
764 memset(&action
, 0, sizeof(action
));
765 action
.sa_flags
= SA_SIGINFO
;
766 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
767 sigaction(SIGBUS
, &action
, NULL
);
769 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
772 static void qemu_kvm_eat_signals(CPUState
*cpu
)
774 struct timespec ts
= { 0, 0 };
780 sigemptyset(&waitset
);
781 sigaddset(&waitset
, SIG_IPI
);
782 sigaddset(&waitset
, SIGBUS
);
785 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
786 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
787 perror("sigtimedwait");
793 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
801 r
= sigpending(&chkset
);
803 perror("sigpending");
806 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
809 #else /* !CONFIG_LINUX */
811 static void qemu_init_sigbus(void)
815 static void qemu_kvm_eat_signals(CPUState
*cpu
)
818 #endif /* !CONFIG_LINUX */
821 static void dummy_signal(int sig
)
825 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
829 struct sigaction sigact
;
831 memset(&sigact
, 0, sizeof(sigact
));
832 sigact
.sa_handler
= dummy_signal
;
833 sigaction(SIG_IPI
, &sigact
, NULL
);
835 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
836 sigdelset(&set
, SIG_IPI
);
837 sigdelset(&set
, SIGBUS
);
838 r
= kvm_set_signal_mask(cpu
, &set
);
840 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
846 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
852 static QemuMutex qemu_global_mutex
;
853 static QemuCond qemu_io_proceeded_cond
;
854 static unsigned iothread_requesting_mutex
;
856 static QemuThread io_thread
;
859 static QemuCond qemu_cpu_cond
;
861 static QemuCond qemu_pause_cond
;
862 static QemuCond qemu_work_cond
;
864 void qemu_init_cpu_loop(void)
867 qemu_cond_init(&qemu_cpu_cond
);
868 qemu_cond_init(&qemu_pause_cond
);
869 qemu_cond_init(&qemu_work_cond
);
870 qemu_cond_init(&qemu_io_proceeded_cond
);
871 qemu_mutex_init(&qemu_global_mutex
);
873 qemu_thread_get_self(&io_thread
);
876 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
878 struct qemu_work_item wi
;
880 if (qemu_cpu_is_self(cpu
)) {
889 qemu_mutex_lock(&cpu
->work_mutex
);
890 if (cpu
->queued_work_first
== NULL
) {
891 cpu
->queued_work_first
= &wi
;
893 cpu
->queued_work_last
->next
= &wi
;
895 cpu
->queued_work_last
= &wi
;
898 qemu_mutex_unlock(&cpu
->work_mutex
);
901 while (!atomic_mb_read(&wi
.done
)) {
902 CPUState
*self_cpu
= current_cpu
;
904 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
905 current_cpu
= self_cpu
;
909 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
911 struct qemu_work_item
*wi
;
913 if (qemu_cpu_is_self(cpu
)) {
918 wi
= g_malloc0(sizeof(struct qemu_work_item
));
923 qemu_mutex_lock(&cpu
->work_mutex
);
924 if (cpu
->queued_work_first
== NULL
) {
925 cpu
->queued_work_first
= wi
;
927 cpu
->queued_work_last
->next
= wi
;
929 cpu
->queued_work_last
= wi
;
932 qemu_mutex_unlock(&cpu
->work_mutex
);
937 static void flush_queued_work(CPUState
*cpu
)
939 struct qemu_work_item
*wi
;
941 if (cpu
->queued_work_first
== NULL
) {
945 qemu_mutex_lock(&cpu
->work_mutex
);
946 while (cpu
->queued_work_first
!= NULL
) {
947 wi
= cpu
->queued_work_first
;
948 cpu
->queued_work_first
= wi
->next
;
949 if (!cpu
->queued_work_first
) {
950 cpu
->queued_work_last
= NULL
;
952 qemu_mutex_unlock(&cpu
->work_mutex
);
954 qemu_mutex_lock(&cpu
->work_mutex
);
958 atomic_mb_set(&wi
->done
, true);
961 qemu_mutex_unlock(&cpu
->work_mutex
);
962 qemu_cond_broadcast(&qemu_work_cond
);
965 static void qemu_wait_io_event_common(CPUState
*cpu
)
970 qemu_cond_signal(&qemu_pause_cond
);
972 flush_queued_work(cpu
);
973 cpu
->thread_kicked
= false;
976 static void qemu_tcg_wait_io_event(CPUState
*cpu
)
978 while (all_cpu_threads_idle()) {
979 /* Start accounting real time to the virtual clock if the CPUs
981 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
982 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
985 while (iothread_requesting_mutex
) {
986 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
990 qemu_wait_io_event_common(cpu
);
994 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
996 while (cpu_thread_is_idle(cpu
)) {
997 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
1000 qemu_kvm_eat_signals(cpu
);
1001 qemu_wait_io_event_common(cpu
);
1004 static void *qemu_kvm_cpu_thread_fn(void *arg
)
1006 CPUState
*cpu
= arg
;
1009 rcu_register_thread();
1011 qemu_mutex_lock_iothread();
1012 qemu_thread_get_self(cpu
->thread
);
1013 cpu
->thread_id
= qemu_get_thread_id();
1017 r
= kvm_init_vcpu(cpu
);
1019 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
1023 qemu_kvm_init_cpu_signals(cpu
);
1025 /* signal CPU creation */
1026 cpu
->created
= true;
1027 qemu_cond_signal(&qemu_cpu_cond
);
1030 if (cpu_can_run(cpu
)) {
1031 r
= kvm_cpu_exec(cpu
);
1032 if (r
== EXCP_DEBUG
) {
1033 cpu_handle_guest_debug(cpu
);
1036 qemu_kvm_wait_io_event(cpu
);
1042 static void *qemu_dummy_cpu_thread_fn(void *arg
)
1045 fprintf(stderr
, "qtest is not supported under Windows\n");
1048 CPUState
*cpu
= arg
;
1052 rcu_register_thread();
1054 qemu_mutex_lock_iothread();
1055 qemu_thread_get_self(cpu
->thread
);
1056 cpu
->thread_id
= qemu_get_thread_id();
1059 sigemptyset(&waitset
);
1060 sigaddset(&waitset
, SIG_IPI
);
1062 /* signal CPU creation */
1063 cpu
->created
= true;
1064 qemu_cond_signal(&qemu_cpu_cond
);
1069 qemu_mutex_unlock_iothread();
1072 r
= sigwait(&waitset
, &sig
);
1073 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
1078 qemu_mutex_lock_iothread();
1080 qemu_wait_io_event_common(cpu
);
1087 static void tcg_exec_all(void);
1089 static void *qemu_tcg_cpu_thread_fn(void *arg
)
1091 CPUState
*cpu
= arg
;
1093 rcu_register_thread();
1095 qemu_mutex_lock_iothread();
1096 qemu_thread_get_self(cpu
->thread
);
1099 cpu
->thread_id
= qemu_get_thread_id();
1100 cpu
->created
= true;
1103 qemu_cond_signal(&qemu_cpu_cond
);
1105 /* wait for initial kick-off after machine start */
1106 while (first_cpu
->stopped
) {
1107 qemu_cond_wait(first_cpu
->halt_cond
, &qemu_global_mutex
);
1109 /* process any pending work */
1111 qemu_wait_io_event_common(cpu
);
1115 /* process any pending work */
1116 atomic_mb_set(&exit_request
, 1);
1122 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1124 if (deadline
== 0) {
1125 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
1128 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus
));
1134 static void qemu_cpu_kick_thread(CPUState
*cpu
)
1139 if (cpu
->thread_kicked
) {
1142 cpu
->thread_kicked
= true;
1143 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
1145 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
1153 static void qemu_cpu_kick_no_halt(void)
1156 /* Ensure whatever caused the exit has reached the CPU threads before
1157 * writing exit_request.
1159 atomic_mb_set(&exit_request
, 1);
1160 cpu
= atomic_mb_read(&tcg_current_cpu
);
1166 void qemu_cpu_kick(CPUState
*cpu
)
1168 qemu_cond_broadcast(cpu
->halt_cond
);
1169 if (tcg_enabled()) {
1170 qemu_cpu_kick_no_halt();
1172 qemu_cpu_kick_thread(cpu
);
1176 void qemu_cpu_kick_self(void)
1178 assert(current_cpu
);
1179 qemu_cpu_kick_thread(current_cpu
);
1182 bool qemu_cpu_is_self(CPUState
*cpu
)
1184 return qemu_thread_is_self(cpu
->thread
);
1187 bool qemu_in_vcpu_thread(void)
1189 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1192 static __thread
bool iothread_locked
= false;
1194 bool qemu_mutex_iothread_locked(void)
1196 return iothread_locked
;
1199 void qemu_mutex_lock_iothread(void)
1201 atomic_inc(&iothread_requesting_mutex
);
1202 /* In the simple case there is no need to bump the VCPU thread out of
1203 * TCG code execution.
1205 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1206 !first_cpu
|| !first_cpu
->created
) {
1207 qemu_mutex_lock(&qemu_global_mutex
);
1208 atomic_dec(&iothread_requesting_mutex
);
1210 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1211 qemu_cpu_kick_no_halt();
1212 qemu_mutex_lock(&qemu_global_mutex
);
1214 atomic_dec(&iothread_requesting_mutex
);
1215 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1217 iothread_locked
= true;
1220 void qemu_mutex_unlock_iothread(void)
1222 iothread_locked
= false;
1223 qemu_mutex_unlock(&qemu_global_mutex
);
1226 static int all_vcpus_paused(void)
1231 if (!cpu
->stopped
) {
1239 void pause_all_vcpus(void)
1243 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1249 if (qemu_in_vcpu_thread()) {
1251 if (!kvm_enabled()) {
1254 cpu
->stopped
= true;
1260 while (!all_vcpus_paused()) {
1261 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1268 void cpu_resume(CPUState
*cpu
)
1271 cpu
->stopped
= false;
1275 void resume_all_vcpus(void)
1279 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1285 /* For temporary buffers for forming a name */
1286 #define VCPU_THREAD_NAME_SIZE 16
1288 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1290 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1291 static QemuCond
*tcg_halt_cond
;
1292 static QemuThread
*tcg_cpu_thread
;
1294 tcg_cpu_address_space_init(cpu
, cpu
->as
);
1296 /* share a single thread for all cpus with TCG */
1297 if (!tcg_cpu_thread
) {
1298 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1299 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1300 qemu_cond_init(cpu
->halt_cond
);
1301 tcg_halt_cond
= cpu
->halt_cond
;
1302 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/TCG",
1304 qemu_thread_create(cpu
->thread
, thread_name
, qemu_tcg_cpu_thread_fn
,
1305 cpu
, QEMU_THREAD_JOINABLE
);
1307 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1309 while (!cpu
->created
) {
1310 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1312 tcg_cpu_thread
= cpu
->thread
;
1314 cpu
->thread
= tcg_cpu_thread
;
1315 cpu
->halt_cond
= tcg_halt_cond
;
1319 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1321 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1323 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1324 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1325 qemu_cond_init(cpu
->halt_cond
);
1326 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/KVM",
1328 qemu_thread_create(cpu
->thread
, thread_name
, qemu_kvm_cpu_thread_fn
,
1329 cpu
, QEMU_THREAD_JOINABLE
);
1330 while (!cpu
->created
) {
1331 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1335 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1337 char thread_name
[VCPU_THREAD_NAME_SIZE
];
1339 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1340 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1341 qemu_cond_init(cpu
->halt_cond
);
1342 snprintf(thread_name
, VCPU_THREAD_NAME_SIZE
, "CPU %d/DUMMY",
1344 qemu_thread_create(cpu
->thread
, thread_name
, qemu_dummy_cpu_thread_fn
, cpu
,
1345 QEMU_THREAD_JOINABLE
);
1346 while (!cpu
->created
) {
1347 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1351 void qemu_init_vcpu(CPUState
*cpu
)
1353 cpu
->nr_cores
= smp_cores
;
1354 cpu
->nr_threads
= smp_threads
;
1355 cpu
->stopped
= true;
1356 if (kvm_enabled()) {
1357 qemu_kvm_start_vcpu(cpu
);
1358 } else if (tcg_enabled()) {
1359 qemu_tcg_init_vcpu(cpu
);
1361 qemu_dummy_start_vcpu(cpu
);
1365 void cpu_stop_current(void)
1368 current_cpu
->stop
= false;
1369 current_cpu
->stopped
= true;
1370 cpu_exit(current_cpu
);
1371 qemu_cond_signal(&qemu_pause_cond
);
1375 int vm_stop(RunState state
)
1377 if (qemu_in_vcpu_thread()) {
1378 qemu_system_vmstop_request_prepare();
1379 qemu_system_vmstop_request(state
);
1381 * FIXME: should not return to device code in case
1382 * vm_stop() has been requested.
1388 return do_vm_stop(state
);
1391 /* does a state transition even if the VM is already stopped,
1392 current state is forgotten forever */
1393 int vm_stop_force_state(RunState state
)
1395 if (runstate_is_running()) {
1396 return vm_stop(state
);
1398 runstate_set(state
);
1399 /* Make sure to return an error if the flush in a previous vm_stop()
1401 return bdrv_flush_all();
1405 static int tcg_cpu_exec(CPUState
*cpu
)
1408 #ifdef CONFIG_PROFILER
1412 #ifdef CONFIG_PROFILER
1413 ti
= profile_getclock();
1419 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1420 + cpu
->icount_extra
);
1421 cpu
->icount_decr
.u16
.low
= 0;
1422 cpu
->icount_extra
= 0;
1423 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1425 /* Maintain prior (possibly buggy) behaviour where if no deadline
1426 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1427 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1430 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1431 deadline
= INT32_MAX
;
1434 count
= qemu_icount_round(deadline
);
1435 timers_state
.qemu_icount
+= count
;
1436 decr
= (count
> 0xffff) ? 0xffff : count
;
1438 cpu
->icount_decr
.u16
.low
= decr
;
1439 cpu
->icount_extra
= count
;
1441 ret
= cpu_exec(cpu
);
1442 #ifdef CONFIG_PROFILER
1443 tcg_time
+= profile_getclock() - ti
;
1446 /* Fold pending instructions back into the
1447 instruction counter, and clear the interrupt flag. */
1448 timers_state
.qemu_icount
-= (cpu
->icount_decr
.u16
.low
1449 + cpu
->icount_extra
);
1450 cpu
->icount_decr
.u32
= 0;
1451 cpu
->icount_extra
= 0;
1456 static void tcg_exec_all(void)
1460 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1461 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1463 if (next_cpu
== NULL
) {
1464 next_cpu
= first_cpu
;
1466 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1467 CPUState
*cpu
= next_cpu
;
1469 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1470 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1472 if (cpu_can_run(cpu
)) {
1473 r
= tcg_cpu_exec(cpu
);
1474 if (r
== EXCP_DEBUG
) {
1475 cpu_handle_guest_debug(cpu
);
1478 } else if (cpu
->stop
|| cpu
->stopped
) {
1483 /* Pairs with smp_wmb in qemu_cpu_kick. */
1484 atomic_mb_set(&exit_request
, 0);
1487 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1489 /* XXX: implement xxx_cpu_list for targets that still miss it */
1490 #if defined(cpu_list)
1491 cpu_list(f
, cpu_fprintf
);
1495 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1497 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1502 #if defined(TARGET_I386)
1503 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1504 CPUX86State
*env
= &x86_cpu
->env
;
1505 #elif defined(TARGET_PPC)
1506 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1507 CPUPPCState
*env
= &ppc_cpu
->env
;
1508 #elif defined(TARGET_SPARC)
1509 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1510 CPUSPARCState
*env
= &sparc_cpu
->env
;
1511 #elif defined(TARGET_MIPS)
1512 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1513 CPUMIPSState
*env
= &mips_cpu
->env
;
1514 #elif defined(TARGET_TRICORE)
1515 TriCoreCPU
*tricore_cpu
= TRICORE_CPU(cpu
);
1516 CPUTriCoreState
*env
= &tricore_cpu
->env
;
1519 cpu_synchronize_state(cpu
);
1521 info
= g_malloc0(sizeof(*info
));
1522 info
->value
= g_malloc0(sizeof(*info
->value
));
1523 info
->value
->CPU
= cpu
->cpu_index
;
1524 info
->value
->current
= (cpu
== first_cpu
);
1525 info
->value
->halted
= cpu
->halted
;
1526 info
->value
->qom_path
= object_get_canonical_path(OBJECT(cpu
));
1527 info
->value
->thread_id
= cpu
->thread_id
;
1528 #if defined(TARGET_I386)
1529 info
->value
->has_pc
= true;
1530 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1531 #elif defined(TARGET_PPC)
1532 info
->value
->has_nip
= true;
1533 info
->value
->nip
= env
->nip
;
1534 #elif defined(TARGET_SPARC)
1535 info
->value
->has_pc
= true;
1536 info
->value
->pc
= env
->pc
;
1537 info
->value
->has_npc
= true;
1538 info
->value
->npc
= env
->npc
;
1539 #elif defined(TARGET_MIPS)
1540 info
->value
->has_PC
= true;
1541 info
->value
->PC
= env
->active_tc
.PC
;
1542 #elif defined(TARGET_TRICORE)
1543 info
->value
->has_PC
= true;
1544 info
->value
->PC
= env
->PC
;
1547 /* XXX: waiting for the qapi to support GSList */
1549 head
= cur_item
= info
;
1551 cur_item
->next
= info
;
1559 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1560 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1566 int64_t orig_addr
= addr
, orig_size
= size
;
1572 cpu
= qemu_get_cpu(cpu_index
);
1574 error_setg(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1579 f
= fopen(filename
, "wb");
1581 error_setg_file_open(errp
, errno
, filename
);
1589 if (cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0) != 0) {
1590 error_setg(errp
, "Invalid addr 0x%016" PRIx64
"/size %" PRId64
1591 " specified", orig_addr
, orig_size
);
1594 if (fwrite(buf
, 1, l
, f
) != l
) {
1595 error_setg(errp
, QERR_IO_ERROR
);
1606 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1613 f
= fopen(filename
, "wb");
1615 error_setg_file_open(errp
, errno
, filename
);
1623 cpu_physical_memory_read(addr
, buf
, l
);
1624 if (fwrite(buf
, 1, l
, f
) != l
) {
1625 error_setg(errp
, QERR_IO_ERROR
);
1636 void qmp_inject_nmi(Error
**errp
)
1638 #if defined(TARGET_I386)
1642 X86CPU
*cpu
= X86_CPU(cs
);
1644 if (!cpu
->apic_state
) {
1645 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1647 apic_deliver_nmi(cpu
->apic_state
);
1651 nmi_monitor_handle(monitor_get_cpu_index(), errp
);
1655 void dump_drift_info(FILE *f
, fprintf_function cpu_fprintf
)
1661 cpu_fprintf(f
, "Host - Guest clock %"PRIi64
" ms\n",
1662 (cpu_get_clock() - cpu_get_icount())/SCALE_MS
);
1663 if (icount_align_option
) {
1664 cpu_fprintf(f
, "Max guest delay %"PRIi64
" ms\n", -max_delay
/SCALE_MS
);
1665 cpu_fprintf(f
, "Max guest advance %"PRIi64
" ms\n", max_advance
/SCALE_MS
);
1667 cpu_fprintf(f
, "Max guest delay NA\n");
1668 cpu_fprintf(f
, "Max guest advance NA\n");