4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
43 #include "qemu/compatfd.h"
48 #include <sys/prctl.h>
51 #define PR_MCE_KILL 33
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
62 #endif /* CONFIG_LINUX */
64 static CPUState
*next_cpu
;
66 bool cpu_is_stopped(CPUState
*cpu
)
68 return cpu
->stopped
|| !runstate_is_running();
71 static bool cpu_thread_is_idle(CPUState
*cpu
)
73 if (cpu
->stop
|| cpu
->queued_work_first
) {
76 if (cpu_is_stopped(cpu
)) {
79 if (!cpu
->halted
|| qemu_cpu_has_work(cpu
) ||
80 kvm_halt_in_kernel()) {
86 static bool all_cpu_threads_idle(void)
91 if (!cpu_thread_is_idle(cpu
)) {
98 /***********************************************************/
99 /* guest cycle counter */
101 /* Conversion factor from emulated instructions to virtual clock ticks. */
102 static int icount_time_shift
;
103 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
104 #define MAX_ICOUNT_SHIFT 10
105 /* Compensate for varying guest execution speed. */
106 static int64_t qemu_icount_bias
;
107 static QEMUTimer
*icount_rt_timer
;
108 static QEMUTimer
*icount_vm_timer
;
109 static QEMUTimer
*icount_warp_timer
;
110 static int64_t vm_clock_warp_start
;
111 static int64_t qemu_icount
;
113 typedef struct TimersState
{
114 /* Protected by BQL. */
115 int64_t cpu_ticks_prev
;
116 int64_t cpu_ticks_offset
;
118 /* cpu_clock_offset can be read out of BQL, so protect it with
121 QemuSeqLock vm_clock_seqlock
;
122 int64_t cpu_clock_offset
;
123 int32_t cpu_ticks_enabled
;
127 static TimersState timers_state
;
129 /* Return the virtual CPU time, based on the instruction counter. */
130 int64_t cpu_get_icount(void)
133 CPUState
*cpu
= current_cpu
;
135 icount
= qemu_icount
;
137 CPUArchState
*env
= cpu
->env_ptr
;
138 if (!can_do_io(env
)) {
139 fprintf(stderr
, "Bad clock read\n");
141 icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
143 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
146 /* return the host CPU cycle counter and handle stop/restart */
147 /* Caller must hold the BQL */
148 int64_t cpu_get_ticks(void)
151 return cpu_get_icount();
153 if (!timers_state
.cpu_ticks_enabled
) {
154 return timers_state
.cpu_ticks_offset
;
157 ticks
= cpu_get_real_ticks();
158 if (timers_state
.cpu_ticks_prev
> ticks
) {
159 /* Note: non increasing ticks may happen if the host uses
161 timers_state
.cpu_ticks_offset
+= timers_state
.cpu_ticks_prev
- ticks
;
163 timers_state
.cpu_ticks_prev
= ticks
;
164 return ticks
+ timers_state
.cpu_ticks_offset
;
168 static int64_t cpu_get_clock_locked(void)
172 if (!timers_state
.cpu_ticks_enabled
) {
173 ti
= timers_state
.cpu_clock_offset
;
176 ti
+= timers_state
.cpu_clock_offset
;
182 /* return the host CPU monotonic timer and handle stop/restart */
183 int64_t cpu_get_clock(void)
189 start
= seqlock_read_begin(&timers_state
.vm_clock_seqlock
);
190 ti
= cpu_get_clock_locked();
191 } while (seqlock_read_retry(&timers_state
.vm_clock_seqlock
, start
));
196 /* enable cpu_get_ticks()
197 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
199 void cpu_enable_ticks(void)
201 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
202 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
203 if (!timers_state
.cpu_ticks_enabled
) {
204 timers_state
.cpu_ticks_offset
-= cpu_get_real_ticks();
205 timers_state
.cpu_clock_offset
-= get_clock();
206 timers_state
.cpu_ticks_enabled
= 1;
208 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
211 /* disable cpu_get_ticks() : the clock is stopped. You must not call
212 * cpu_get_ticks() after that.
213 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
215 void cpu_disable_ticks(void)
217 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
218 seqlock_write_lock(&timers_state
.vm_clock_seqlock
);
219 if (timers_state
.cpu_ticks_enabled
) {
220 timers_state
.cpu_ticks_offset
= cpu_get_ticks();
221 timers_state
.cpu_clock_offset
= cpu_get_clock_locked();
222 timers_state
.cpu_ticks_enabled
= 0;
224 seqlock_write_unlock(&timers_state
.vm_clock_seqlock
);
227 /* Correlation between real and virtual time is always going to be
228 fairly approximate, so ignore small variation.
229 When the guest is idle real and virtual time will be aligned in
231 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
233 static void icount_adjust(void)
238 static int64_t last_delta
;
240 /* If the VM is not running, then do nothing. */
241 if (!runstate_is_running()) {
245 cur_time
= cpu_get_clock();
246 cur_icount
= cpu_get_icount();
248 delta
= cur_icount
- cur_time
;
249 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
251 && last_delta
+ ICOUNT_WOBBLE
< delta
* 2
252 && icount_time_shift
> 0) {
253 /* The guest is getting too far ahead. Slow time down. */
257 && last_delta
- ICOUNT_WOBBLE
> delta
* 2
258 && icount_time_shift
< MAX_ICOUNT_SHIFT
) {
259 /* The guest is getting too far behind. Speed time up. */
263 qemu_icount_bias
= cur_icount
- (qemu_icount
<< icount_time_shift
);
266 static void icount_adjust_rt(void *opaque
)
268 timer_mod(icount_rt_timer
,
269 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
273 static void icount_adjust_vm(void *opaque
)
275 timer_mod(icount_vm_timer
,
276 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
277 get_ticks_per_sec() / 10);
281 static int64_t qemu_icount_round(int64_t count
)
283 return (count
+ (1 << icount_time_shift
) - 1) >> icount_time_shift
;
286 static void icount_warp_rt(void *opaque
)
288 if (vm_clock_warp_start
== -1) {
292 if (runstate_is_running()) {
293 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
296 warp_delta
= clock
- vm_clock_warp_start
;
297 if (use_icount
== 2) {
299 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
300 * far ahead of real time.
302 int64_t cur_time
= cpu_get_clock();
303 int64_t cur_icount
= cpu_get_icount();
304 int64_t delta
= cur_time
- cur_icount
;
305 warp_delta
= MIN(warp_delta
, delta
);
307 qemu_icount_bias
+= warp_delta
;
309 vm_clock_warp_start
= -1;
311 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL
)) {
312 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
316 void qtest_clock_warp(int64_t dest
)
318 int64_t clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
319 assert(qtest_enabled());
320 while (clock
< dest
) {
321 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
322 int64_t warp
= MIN(dest
- clock
, deadline
);
323 qemu_icount_bias
+= warp
;
324 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL
);
325 clock
= qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
);
327 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
330 void qemu_clock_warp(QEMUClockType type
)
335 * There are too many global variables to make the "warp" behavior
336 * applicable to other clocks. But a clock argument removes the
337 * need for if statements all over the place.
339 if (type
!= QEMU_CLOCK_VIRTUAL
|| !use_icount
) {
344 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
345 * This ensures that the deadline for the timer is computed correctly below.
346 * This also makes sure that the insn counter is synchronized before the
347 * CPU starts running, in case the CPU is woken by an event other than
348 * the earliest QEMU_CLOCK_VIRTUAL timer.
350 icount_warp_rt(NULL
);
351 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL
)) {
352 timer_del(icount_warp_timer
);
356 if (qtest_enabled()) {
357 /* When testing, qtest commands advance icount. */
361 vm_clock_warp_start
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
362 /* We want to use the earliest deadline from ALL vm_clocks */
363 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
365 /* Maintain prior (possibly buggy) behaviour where if no deadline
366 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
367 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
370 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
371 deadline
= INT32_MAX
;
376 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
377 * sleep. Otherwise, the CPU might be waiting for a future timer
378 * interrupt to wake it up, but the interrupt never comes because
379 * the vCPU isn't running any insns and thus doesn't advance the
380 * QEMU_CLOCK_VIRTUAL.
382 * An extreme solution for this problem would be to never let VCPUs
383 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
384 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
385 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
386 * after some e"real" time, (related to the time left until the next
387 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
388 * This avoids that the warps are visible externally; for example,
389 * you will not be sending network packets continuously instead of
392 timer_mod(icount_warp_timer
, vm_clock_warp_start
+ deadline
);
393 } else if (deadline
== 0) {
394 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
398 static const VMStateDescription vmstate_timers
= {
401 .minimum_version_id
= 1,
402 .minimum_version_id_old
= 1,
403 .fields
= (VMStateField
[]) {
404 VMSTATE_INT64(cpu_ticks_offset
, TimersState
),
405 VMSTATE_INT64(dummy
, TimersState
),
406 VMSTATE_INT64_V(cpu_clock_offset
, TimersState
, 2),
407 VMSTATE_END_OF_LIST()
411 void configure_icount(const char *option
)
413 seqlock_init(&timers_state
.vm_clock_seqlock
, NULL
);
414 vmstate_register(NULL
, 0, &vmstate_timers
, &timers_state
);
419 icount_warp_timer
= timer_new_ns(QEMU_CLOCK_REALTIME
,
420 icount_warp_rt
, NULL
);
421 if (strcmp(option
, "auto") != 0) {
422 icount_time_shift
= strtol(option
, NULL
, 0);
429 /* 125MIPS seems a reasonable initial guess at the guest speed.
430 It will be corrected fairly quickly anyway. */
431 icount_time_shift
= 3;
433 /* Have both realtime and virtual time triggers for speed adjustment.
434 The realtime trigger catches emulated time passing too slowly,
435 the virtual time trigger catches emulated time passing too fast.
436 Realtime triggers occur even when idle, so use them less frequently
438 icount_rt_timer
= timer_new_ms(QEMU_CLOCK_REALTIME
,
439 icount_adjust_rt
, NULL
);
440 timer_mod(icount_rt_timer
,
441 qemu_clock_get_ms(QEMU_CLOCK_REALTIME
) + 1000);
442 icount_vm_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
,
443 icount_adjust_vm
, NULL
);
444 timer_mod(icount_vm_timer
,
445 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
446 get_ticks_per_sec() / 10);
449 /***********************************************************/
450 void hw_error(const char *fmt
, ...)
456 fprintf(stderr
, "qemu: hardware error: ");
457 vfprintf(stderr
, fmt
, ap
);
458 fprintf(stderr
, "\n");
460 fprintf(stderr
, "CPU #%d:\n", cpu
->cpu_index
);
461 cpu_dump_state(cpu
, stderr
, fprintf
, CPU_DUMP_FPU
);
467 void cpu_synchronize_all_states(void)
472 cpu_synchronize_state(cpu
);
476 void cpu_synchronize_all_post_reset(void)
481 cpu_synchronize_post_reset(cpu
);
485 void cpu_synchronize_all_post_init(void)
490 cpu_synchronize_post_init(cpu
);
494 static int do_vm_stop(RunState state
)
498 if (runstate_is_running()) {
502 vm_state_notify(0, state
);
503 monitor_protocol_event(QEVENT_STOP
, NULL
);
507 ret
= bdrv_flush_all();
512 static bool cpu_can_run(CPUState
*cpu
)
517 if (cpu_is_stopped(cpu
)) {
523 static void cpu_handle_guest_debug(CPUState
*cpu
)
525 gdb_set_stop_cpu(cpu
);
526 qemu_system_debug_request();
530 static void cpu_signal(int sig
)
533 cpu_exit(current_cpu
);
539 static void sigbus_reraise(void)
542 struct sigaction action
;
544 memset(&action
, 0, sizeof(action
));
545 action
.sa_handler
= SIG_DFL
;
546 if (!sigaction(SIGBUS
, &action
, NULL
)) {
549 sigaddset(&set
, SIGBUS
);
550 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
552 perror("Failed to re-raise SIGBUS!\n");
556 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
559 if (kvm_on_sigbus(siginfo
->ssi_code
,
560 (void *)(intptr_t)siginfo
->ssi_addr
)) {
565 static void qemu_init_sigbus(void)
567 struct sigaction action
;
569 memset(&action
, 0, sizeof(action
));
570 action
.sa_flags
= SA_SIGINFO
;
571 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
572 sigaction(SIGBUS
, &action
, NULL
);
574 prctl(PR_MCE_KILL
, PR_MCE_KILL_SET
, PR_MCE_KILL_EARLY
, 0, 0);
577 static void qemu_kvm_eat_signals(CPUState
*cpu
)
579 struct timespec ts
= { 0, 0 };
585 sigemptyset(&waitset
);
586 sigaddset(&waitset
, SIG_IPI
);
587 sigaddset(&waitset
, SIGBUS
);
590 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
591 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
592 perror("sigtimedwait");
598 if (kvm_on_sigbus_vcpu(cpu
, siginfo
.si_code
, siginfo
.si_addr
)) {
606 r
= sigpending(&chkset
);
608 perror("sigpending");
611 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
614 #else /* !CONFIG_LINUX */
616 static void qemu_init_sigbus(void)
620 static void qemu_kvm_eat_signals(CPUState
*cpu
)
623 #endif /* !CONFIG_LINUX */
626 static void dummy_signal(int sig
)
630 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
634 struct sigaction sigact
;
636 memset(&sigact
, 0, sizeof(sigact
));
637 sigact
.sa_handler
= dummy_signal
;
638 sigaction(SIG_IPI
, &sigact
, NULL
);
640 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
641 sigdelset(&set
, SIG_IPI
);
642 sigdelset(&set
, SIGBUS
);
643 r
= kvm_set_signal_mask(cpu
, &set
);
645 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
650 static void qemu_tcg_init_cpu_signals(void)
653 struct sigaction sigact
;
655 memset(&sigact
, 0, sizeof(sigact
));
656 sigact
.sa_handler
= cpu_signal
;
657 sigaction(SIG_IPI
, &sigact
, NULL
);
660 sigaddset(&set
, SIG_IPI
);
661 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
665 static void qemu_kvm_init_cpu_signals(CPUState
*cpu
)
670 static void qemu_tcg_init_cpu_signals(void)
675 static QemuMutex qemu_global_mutex
;
676 static QemuCond qemu_io_proceeded_cond
;
677 static bool iothread_requesting_mutex
;
679 static QemuThread io_thread
;
681 static QemuThread
*tcg_cpu_thread
;
682 static QemuCond
*tcg_halt_cond
;
685 static QemuCond qemu_cpu_cond
;
687 static QemuCond qemu_pause_cond
;
688 static QemuCond qemu_work_cond
;
690 void qemu_init_cpu_loop(void)
693 qemu_cond_init(&qemu_cpu_cond
);
694 qemu_cond_init(&qemu_pause_cond
);
695 qemu_cond_init(&qemu_work_cond
);
696 qemu_cond_init(&qemu_io_proceeded_cond
);
697 qemu_mutex_init(&qemu_global_mutex
);
699 qemu_thread_get_self(&io_thread
);
702 void run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
704 struct qemu_work_item wi
;
706 if (qemu_cpu_is_self(cpu
)) {
714 if (cpu
->queued_work_first
== NULL
) {
715 cpu
->queued_work_first
= &wi
;
717 cpu
->queued_work_last
->next
= &wi
;
719 cpu
->queued_work_last
= &wi
;
725 CPUState
*self_cpu
= current_cpu
;
727 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
728 current_cpu
= self_cpu
;
732 void async_run_on_cpu(CPUState
*cpu
, void (*func
)(void *data
), void *data
)
734 struct qemu_work_item
*wi
;
736 if (qemu_cpu_is_self(cpu
)) {
741 wi
= g_malloc0(sizeof(struct qemu_work_item
));
745 if (cpu
->queued_work_first
== NULL
) {
746 cpu
->queued_work_first
= wi
;
748 cpu
->queued_work_last
->next
= wi
;
750 cpu
->queued_work_last
= wi
;
757 static void flush_queued_work(CPUState
*cpu
)
759 struct qemu_work_item
*wi
;
761 if (cpu
->queued_work_first
== NULL
) {
765 while ((wi
= cpu
->queued_work_first
)) {
766 cpu
->queued_work_first
= wi
->next
;
773 cpu
->queued_work_last
= NULL
;
774 qemu_cond_broadcast(&qemu_work_cond
);
777 static void qemu_wait_io_event_common(CPUState
*cpu
)
782 qemu_cond_signal(&qemu_pause_cond
);
784 flush_queued_work(cpu
);
785 cpu
->thread_kicked
= false;
788 static void qemu_tcg_wait_io_event(void)
792 while (all_cpu_threads_idle()) {
793 /* Start accounting real time to the virtual clock if the CPUs
795 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
796 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
799 while (iothread_requesting_mutex
) {
800 qemu_cond_wait(&qemu_io_proceeded_cond
, &qemu_global_mutex
);
804 qemu_wait_io_event_common(cpu
);
808 static void qemu_kvm_wait_io_event(CPUState
*cpu
)
810 while (cpu_thread_is_idle(cpu
)) {
811 qemu_cond_wait(cpu
->halt_cond
, &qemu_global_mutex
);
814 qemu_kvm_eat_signals(cpu
);
815 qemu_wait_io_event_common(cpu
);
818 static void *qemu_kvm_cpu_thread_fn(void *arg
)
823 qemu_mutex_lock(&qemu_global_mutex
);
824 qemu_thread_get_self(cpu
->thread
);
825 cpu
->thread_id
= qemu_get_thread_id();
828 r
= kvm_init_vcpu(cpu
);
830 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
834 qemu_kvm_init_cpu_signals(cpu
);
836 /* signal CPU creation */
838 qemu_cond_signal(&qemu_cpu_cond
);
841 if (cpu_can_run(cpu
)) {
842 r
= kvm_cpu_exec(cpu
);
843 if (r
== EXCP_DEBUG
) {
844 cpu_handle_guest_debug(cpu
);
847 qemu_kvm_wait_io_event(cpu
);
853 static void *qemu_dummy_cpu_thread_fn(void *arg
)
856 fprintf(stderr
, "qtest is not supported under Windows\n");
863 qemu_mutex_lock_iothread();
864 qemu_thread_get_self(cpu
->thread
);
865 cpu
->thread_id
= qemu_get_thread_id();
867 sigemptyset(&waitset
);
868 sigaddset(&waitset
, SIG_IPI
);
870 /* signal CPU creation */
872 qemu_cond_signal(&qemu_cpu_cond
);
877 qemu_mutex_unlock_iothread();
880 r
= sigwait(&waitset
, &sig
);
881 } while (r
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
886 qemu_mutex_lock_iothread();
888 qemu_wait_io_event_common(cpu
);
895 static void tcg_exec_all(void);
897 static void *qemu_tcg_cpu_thread_fn(void *arg
)
901 qemu_tcg_init_cpu_signals();
902 qemu_thread_get_self(cpu
->thread
);
904 qemu_mutex_lock(&qemu_global_mutex
);
906 cpu
->thread_id
= qemu_get_thread_id();
909 qemu_cond_signal(&qemu_cpu_cond
);
911 /* wait for initial kick-off after machine start */
912 while (QTAILQ_FIRST(&cpus
)->stopped
) {
913 qemu_cond_wait(tcg_halt_cond
, &qemu_global_mutex
);
915 /* process any pending work */
917 qemu_wait_io_event_common(cpu
);
925 int64_t deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
928 qemu_clock_notify(QEMU_CLOCK_VIRTUAL
);
931 qemu_tcg_wait_io_event();
937 static void qemu_cpu_kick_thread(CPUState
*cpu
)
942 err
= pthread_kill(cpu
->thread
->thread
, SIG_IPI
);
944 fprintf(stderr
, "qemu:%s: %s", __func__
, strerror(err
));
948 if (!qemu_cpu_is_self(cpu
)) {
951 if (SuspendThread(cpu
->hThread
) == (DWORD
)-1) {
952 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
957 /* On multi-core systems, we are not sure that the thread is actually
958 * suspended until we can get the context.
960 tcgContext
.ContextFlags
= CONTEXT_CONTROL
;
961 while (GetThreadContext(cpu
->hThread
, &tcgContext
) != 0) {
967 if (ResumeThread(cpu
->hThread
) == (DWORD
)-1) {
968 fprintf(stderr
, "qemu:%s: GetLastError:%lu\n", __func__
,
976 void qemu_cpu_kick(CPUState
*cpu
)
978 qemu_cond_broadcast(cpu
->halt_cond
);
979 if (!tcg_enabled() && !cpu
->thread_kicked
) {
980 qemu_cpu_kick_thread(cpu
);
981 cpu
->thread_kicked
= true;
985 void qemu_cpu_kick_self(void)
990 if (!current_cpu
->thread_kicked
) {
991 qemu_cpu_kick_thread(current_cpu
);
992 current_cpu
->thread_kicked
= true;
999 bool qemu_cpu_is_self(CPUState
*cpu
)
1001 return qemu_thread_is_self(cpu
->thread
);
1004 static bool qemu_in_vcpu_thread(void)
1006 return current_cpu
&& qemu_cpu_is_self(current_cpu
);
1009 void qemu_mutex_lock_iothread(void)
1011 if (!tcg_enabled()) {
1012 qemu_mutex_lock(&qemu_global_mutex
);
1014 iothread_requesting_mutex
= true;
1015 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
1016 qemu_cpu_kick_thread(first_cpu
);
1017 qemu_mutex_lock(&qemu_global_mutex
);
1019 iothread_requesting_mutex
= false;
1020 qemu_cond_broadcast(&qemu_io_proceeded_cond
);
1024 void qemu_mutex_unlock_iothread(void)
1026 qemu_mutex_unlock(&qemu_global_mutex
);
1029 static int all_vcpus_paused(void)
1034 if (!cpu
->stopped
) {
1042 void pause_all_vcpus(void)
1046 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, false);
1052 if (qemu_in_vcpu_thread()) {
1054 if (!kvm_enabled()) {
1057 cpu
->stopped
= true;
1063 while (!all_vcpus_paused()) {
1064 qemu_cond_wait(&qemu_pause_cond
, &qemu_global_mutex
);
1071 void cpu_resume(CPUState
*cpu
)
1074 cpu
->stopped
= false;
1078 void resume_all_vcpus(void)
1082 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
, true);
1088 static void qemu_tcg_init_vcpu(CPUState
*cpu
)
1090 /* share a single thread for all cpus with TCG */
1091 if (!tcg_cpu_thread
) {
1092 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1093 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1094 qemu_cond_init(cpu
->halt_cond
);
1095 tcg_halt_cond
= cpu
->halt_cond
;
1096 qemu_thread_create(cpu
->thread
, qemu_tcg_cpu_thread_fn
, cpu
,
1097 QEMU_THREAD_JOINABLE
);
1099 cpu
->hThread
= qemu_thread_get_handle(cpu
->thread
);
1101 while (!cpu
->created
) {
1102 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1104 tcg_cpu_thread
= cpu
->thread
;
1106 cpu
->thread
= tcg_cpu_thread
;
1107 cpu
->halt_cond
= tcg_halt_cond
;
1111 static void qemu_kvm_start_vcpu(CPUState
*cpu
)
1113 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1114 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1115 qemu_cond_init(cpu
->halt_cond
);
1116 qemu_thread_create(cpu
->thread
, qemu_kvm_cpu_thread_fn
, cpu
,
1117 QEMU_THREAD_JOINABLE
);
1118 while (!cpu
->created
) {
1119 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1123 static void qemu_dummy_start_vcpu(CPUState
*cpu
)
1125 cpu
->thread
= g_malloc0(sizeof(QemuThread
));
1126 cpu
->halt_cond
= g_malloc0(sizeof(QemuCond
));
1127 qemu_cond_init(cpu
->halt_cond
);
1128 qemu_thread_create(cpu
->thread
, qemu_dummy_cpu_thread_fn
, cpu
,
1129 QEMU_THREAD_JOINABLE
);
1130 while (!cpu
->created
) {
1131 qemu_cond_wait(&qemu_cpu_cond
, &qemu_global_mutex
);
1135 void qemu_init_vcpu(CPUState
*cpu
)
1137 cpu
->nr_cores
= smp_cores
;
1138 cpu
->nr_threads
= smp_threads
;
1139 cpu
->stopped
= true;
1140 if (kvm_enabled()) {
1141 qemu_kvm_start_vcpu(cpu
);
1142 } else if (tcg_enabled()) {
1143 qemu_tcg_init_vcpu(cpu
);
1145 qemu_dummy_start_vcpu(cpu
);
1149 void cpu_stop_current(void)
1152 current_cpu
->stop
= false;
1153 current_cpu
->stopped
= true;
1154 cpu_exit(current_cpu
);
1155 qemu_cond_signal(&qemu_pause_cond
);
1159 int vm_stop(RunState state
)
1161 if (qemu_in_vcpu_thread()) {
1162 qemu_system_vmstop_request(state
);
1164 * FIXME: should not return to device code in case
1165 * vm_stop() has been requested.
1171 return do_vm_stop(state
);
1174 /* does a state transition even if the VM is already stopped,
1175 current state is forgotten forever */
1176 int vm_stop_force_state(RunState state
)
1178 if (runstate_is_running()) {
1179 return vm_stop(state
);
1181 runstate_set(state
);
1182 /* Make sure to return an error if the flush in a previous vm_stop()
1184 return bdrv_flush_all();
1188 static int tcg_cpu_exec(CPUArchState
*env
)
1191 #ifdef CONFIG_PROFILER
1195 #ifdef CONFIG_PROFILER
1196 ti
= profile_getclock();
1202 qemu_icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
1203 env
->icount_decr
.u16
.low
= 0;
1204 env
->icount_extra
= 0;
1205 deadline
= qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL
);
1207 /* Maintain prior (possibly buggy) behaviour where if no deadline
1208 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1209 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1212 if ((deadline
< 0) || (deadline
> INT32_MAX
)) {
1213 deadline
= INT32_MAX
;
1216 count
= qemu_icount_round(deadline
);
1217 qemu_icount
+= count
;
1218 decr
= (count
> 0xffff) ? 0xffff : count
;
1220 env
->icount_decr
.u16
.low
= decr
;
1221 env
->icount_extra
= count
;
1223 ret
= cpu_exec(env
);
1224 #ifdef CONFIG_PROFILER
1225 qemu_time
+= profile_getclock() - ti
;
1228 /* Fold pending instructions back into the
1229 instruction counter, and clear the interrupt flag. */
1230 qemu_icount
-= (env
->icount_decr
.u16
.low
1231 + env
->icount_extra
);
1232 env
->icount_decr
.u32
= 0;
1233 env
->icount_extra
= 0;
1238 static void tcg_exec_all(void)
1242 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1243 qemu_clock_warp(QEMU_CLOCK_VIRTUAL
);
1245 if (next_cpu
== NULL
) {
1246 next_cpu
= first_cpu
;
1248 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= CPU_NEXT(next_cpu
)) {
1249 CPUState
*cpu
= next_cpu
;
1250 CPUArchState
*env
= cpu
->env_ptr
;
1252 qemu_clock_enable(QEMU_CLOCK_VIRTUAL
,
1253 (cpu
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
1255 if (cpu_can_run(cpu
)) {
1256 r
= tcg_cpu_exec(env
);
1257 if (r
== EXCP_DEBUG
) {
1258 cpu_handle_guest_debug(cpu
);
1261 } else if (cpu
->stop
|| cpu
->stopped
) {
1268 void set_numa_modes(void)
1274 for (i
= 0; i
< nb_numa_nodes
; i
++) {
1275 if (test_bit(cpu
->cpu_index
, node_cpumask
[i
])) {
1282 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1284 /* XXX: implement xxx_cpu_list for targets that still miss it */
1285 #if defined(cpu_list)
1286 cpu_list(f
, cpu_fprintf
);
1290 CpuInfoList
*qmp_query_cpus(Error
**errp
)
1292 CpuInfoList
*head
= NULL
, *cur_item
= NULL
;
1297 #if defined(TARGET_I386)
1298 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1299 CPUX86State
*env
= &x86_cpu
->env
;
1300 #elif defined(TARGET_PPC)
1301 PowerPCCPU
*ppc_cpu
= POWERPC_CPU(cpu
);
1302 CPUPPCState
*env
= &ppc_cpu
->env
;
1303 #elif defined(TARGET_SPARC)
1304 SPARCCPU
*sparc_cpu
= SPARC_CPU(cpu
);
1305 CPUSPARCState
*env
= &sparc_cpu
->env
;
1306 #elif defined(TARGET_MIPS)
1307 MIPSCPU
*mips_cpu
= MIPS_CPU(cpu
);
1308 CPUMIPSState
*env
= &mips_cpu
->env
;
1311 cpu_synchronize_state(cpu
);
1313 info
= g_malloc0(sizeof(*info
));
1314 info
->value
= g_malloc0(sizeof(*info
->value
));
1315 info
->value
->CPU
= cpu
->cpu_index
;
1316 info
->value
->current
= (cpu
== first_cpu
);
1317 info
->value
->halted
= cpu
->halted
;
1318 info
->value
->thread_id
= cpu
->thread_id
;
1319 #if defined(TARGET_I386)
1320 info
->value
->has_pc
= true;
1321 info
->value
->pc
= env
->eip
+ env
->segs
[R_CS
].base
;
1322 #elif defined(TARGET_PPC)
1323 info
->value
->has_nip
= true;
1324 info
->value
->nip
= env
->nip
;
1325 #elif defined(TARGET_SPARC)
1326 info
->value
->has_pc
= true;
1327 info
->value
->pc
= env
->pc
;
1328 info
->value
->has_npc
= true;
1329 info
->value
->npc
= env
->npc
;
1330 #elif defined(TARGET_MIPS)
1331 info
->value
->has_PC
= true;
1332 info
->value
->PC
= env
->active_tc
.PC
;
1335 /* XXX: waiting for the qapi to support GSList */
1337 head
= cur_item
= info
;
1339 cur_item
->next
= info
;
1347 void qmp_memsave(int64_t addr
, int64_t size
, const char *filename
,
1348 bool has_cpu
, int64_t cpu_index
, Error
**errp
)
1359 cpu
= qemu_get_cpu(cpu_index
);
1361 error_set(errp
, QERR_INVALID_PARAMETER_VALUE
, "cpu-index",
1366 f
= fopen(filename
, "wb");
1368 error_setg_file_open(errp
, errno
, filename
);
1376 cpu_memory_rw_debug(cpu
, addr
, buf
, l
, 0);
1377 if (fwrite(buf
, 1, l
, f
) != l
) {
1378 error_set(errp
, QERR_IO_ERROR
);
1389 void qmp_pmemsave(int64_t addr
, int64_t size
, const char *filename
,
1396 f
= fopen(filename
, "wb");
1398 error_setg_file_open(errp
, errno
, filename
);
1406 cpu_physical_memory_rw(addr
, buf
, l
, 0);
1407 if (fwrite(buf
, 1, l
, f
) != l
) {
1408 error_set(errp
, QERR_IO_ERROR
);
1419 void qmp_inject_nmi(Error
**errp
)
1421 #if defined(TARGET_I386)
1425 X86CPU
*cpu
= X86_CPU(cs
);
1426 CPUX86State
*env
= &cpu
->env
;
1428 if (!env
->apic_state
) {
1429 cpu_interrupt(cs
, CPU_INTERRUPT_NMI
);
1431 apic_deliver_nmi(env
->apic_state
);
1434 #elif defined(TARGET_S390X)
1440 if (cpu
->env
.cpu_num
== monitor_get_cpu_index()) {
1441 if (s390_cpu_restart(S390_CPU(cs
)) == -1) {
1442 error_set(errp
, QERR_UNSUPPORTED
);
1449 error_set(errp
, QERR_UNSUPPORTED
);