4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
38 #include <sys/prctl.h>
42 #define SIG_IPI (SIGRTMIN+4)
44 #define SIG_IPI SIGUSR1
48 #define PR_MCE_KILL 33
51 static CPUState
*next_cpu
;
53 /***********************************************************/
54 void hw_error(const char *fmt
, ...)
60 fprintf(stderr
, "qemu: hardware error: ");
61 vfprintf(stderr
, fmt
, ap
);
62 fprintf(stderr
, "\n");
63 for(env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
64 fprintf(stderr
, "CPU #%d:\n", env
->cpu_index
);
66 cpu_dump_state(env
, stderr
, fprintf
, X86_DUMP_FPU
);
68 cpu_dump_state(env
, stderr
, fprintf
, 0);
75 void cpu_synchronize_all_states(void)
79 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
80 cpu_synchronize_state(cpu
);
84 void cpu_synchronize_all_post_reset(void)
88 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
89 cpu_synchronize_post_reset(cpu
);
93 void cpu_synchronize_all_post_init(void)
97 for (cpu
= first_cpu
; cpu
; cpu
= cpu
->next_cpu
) {
98 cpu_synchronize_post_init(cpu
);
102 int cpu_is_stopped(CPUState
*env
)
104 return !vm_running
|| env
->stopped
;
107 static void do_vm_stop(int reason
)
113 vm_state_notify(0, reason
);
116 monitor_protocol_event(QEVENT_STOP
, NULL
);
120 static int cpu_can_run(CPUState
*env
)
124 if (env
->stopped
|| !vm_running
)
129 static int cpu_has_work(CPUState
*env
)
133 if (env
->queued_work_first
)
135 if (env
->stopped
|| !vm_running
)
139 if (qemu_cpu_has_work(env
))
144 static int any_cpu_has_work(void)
148 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
)
149 if (cpu_has_work(env
))
154 static void cpu_debug_handler(CPUState
*env
)
156 gdb_set_stop_cpu(env
);
157 debug_requested
= EXCP_DEBUG
;
162 static int io_thread_fd
= -1;
164 static void qemu_event_increment(void)
166 /* Write 8 bytes to be compatible with eventfd. */
167 static const uint64_t val
= 1;
170 if (io_thread_fd
== -1)
174 ret
= write(io_thread_fd
, &val
, sizeof(val
));
175 } while (ret
< 0 && errno
== EINTR
);
177 /* EAGAIN is fine, a read must be pending. */
178 if (ret
< 0 && errno
!= EAGAIN
) {
179 fprintf(stderr
, "qemu_event_increment: write() filed: %s\n",
185 static void qemu_event_read(void *opaque
)
187 int fd
= (unsigned long)opaque
;
191 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
193 len
= read(fd
, buffer
, sizeof(buffer
));
194 } while ((len
== -1 && errno
== EINTR
) || len
== sizeof(buffer
));
197 static int qemu_event_init(void)
202 err
= qemu_eventfd(fds
);
206 err
= fcntl_setfl(fds
[0], O_NONBLOCK
);
210 err
= fcntl_setfl(fds
[1], O_NONBLOCK
);
214 qemu_set_fd_handler2(fds
[0], NULL
, qemu_event_read
, NULL
,
215 (void *)(unsigned long)fds
[0]);
217 io_thread_fd
= fds
[1];
226 static void dummy_signal(int sig
)
232 HANDLE qemu_event_handle
;
234 static void dummy_event_handler(void *opaque
)
238 static int qemu_event_init(void)
240 qemu_event_handle
= CreateEvent(NULL
, FALSE
, FALSE
, NULL
);
241 if (!qemu_event_handle
) {
242 fprintf(stderr
, "Failed CreateEvent: %ld\n", GetLastError());
245 qemu_add_wait_object(qemu_event_handle
, dummy_event_handler
, NULL
);
249 static void qemu_event_increment(void)
251 if (!SetEvent(qemu_event_handle
)) {
252 fprintf(stderr
, "qemu_event_increment: SetEvent failed: %ld\n",
259 #ifndef CONFIG_IOTHREAD
260 static void qemu_kvm_init_cpu_signals(CPUState
*env
)
265 struct sigaction sigact
;
267 memset(&sigact
, 0, sizeof(sigact
));
268 sigact
.sa_handler
= dummy_signal
;
269 sigaction(SIG_IPI
, &sigact
, NULL
);
272 sigaddset(&set
, SIG_IPI
);
273 pthread_sigmask(SIG_BLOCK
, &set
, NULL
);
275 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
276 sigdelset(&set
, SIG_IPI
);
277 sigdelset(&set
, SIGBUS
);
278 r
= kvm_set_signal_mask(env
, &set
);
280 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
286 int qemu_init_main_loop(void)
288 cpu_set_debug_excp_handler(cpu_debug_handler
);
290 return qemu_event_init();
293 void qemu_main_loop_start(void)
297 void qemu_init_vcpu(void *_env
)
299 CPUState
*env
= _env
;
302 env
->nr_cores
= smp_cores
;
303 env
->nr_threads
= smp_threads
;
306 r
= kvm_init_vcpu(env
);
308 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
311 qemu_kvm_init_cpu_signals(env
);
315 int qemu_cpu_self(void *env
)
320 void run_on_cpu(CPUState
*env
, void (*func
)(void *data
), void *data
)
325 void resume_all_vcpus(void)
329 void pause_all_vcpus(void)
333 void qemu_cpu_kick(void *env
)
338 void qemu_notify_event(void)
340 CPUState
*env
= cpu_single_env
;
342 qemu_event_increment ();
346 if (next_cpu
&& env
!= next_cpu
) {
352 void qemu_mutex_lock_iothread(void) {}
353 void qemu_mutex_unlock_iothread(void) {}
355 void cpu_stop_current(void)
359 void vm_stop(int reason
)
364 #else /* CONFIG_IOTHREAD */
366 #include "qemu-thread.h"
368 QemuMutex qemu_global_mutex
;
369 static QemuMutex qemu_fair_mutex
;
371 static QemuThread io_thread
;
373 static QemuThread
*tcg_cpu_thread
;
374 static QemuCond
*tcg_halt_cond
;
376 static int qemu_system_ready
;
378 static QemuCond qemu_cpu_cond
;
380 static QemuCond qemu_system_cond
;
381 static QemuCond qemu_pause_cond
;
382 static QemuCond qemu_work_cond
;
384 /* If we have signalfd, we mask out the signals we want to handle and then
385 * use signalfd to listen for them. We rely on whatever the current signal
386 * handler is to dispatch the signals when we receive them.
388 static void sigfd_handler(void *opaque
)
390 int fd
= (unsigned long) opaque
;
391 struct qemu_signalfd_siginfo info
;
392 struct sigaction action
;
397 len
= read(fd
, &info
, sizeof(info
));
398 } while (len
== -1 && errno
== EINTR
);
400 if (len
== -1 && errno
== EAGAIN
) {
404 if (len
!= sizeof(info
)) {
405 printf("read from sigfd returned %zd: %m\n", len
);
409 sigaction(info
.ssi_signo
, NULL
, &action
);
410 if ((action
.sa_flags
& SA_SIGINFO
) && action
.sa_sigaction
) {
411 action
.sa_sigaction(info
.ssi_signo
,
412 (siginfo_t
*)&info
, NULL
);
413 } else if (action
.sa_handler
) {
414 action
.sa_handler(info
.ssi_signo
);
419 static void cpu_signal(int sig
)
421 if (cpu_single_env
) {
422 cpu_exit(cpu_single_env
);
427 static void qemu_kvm_init_cpu_signals(CPUState
*env
)
431 struct sigaction sigact
;
433 memset(&sigact
, 0, sizeof(sigact
));
434 sigact
.sa_handler
= dummy_signal
;
435 sigaction(SIG_IPI
, &sigact
, NULL
);
437 pthread_sigmask(SIG_BLOCK
, NULL
, &set
);
438 sigdelset(&set
, SIG_IPI
);
439 sigdelset(&set
, SIGBUS
);
440 r
= kvm_set_signal_mask(env
, &set
);
442 fprintf(stderr
, "kvm_set_signal_mask: %s\n", strerror(-r
));
447 static void qemu_tcg_init_cpu_signals(void)
450 struct sigaction sigact
;
452 memset(&sigact
, 0, sizeof(sigact
));
453 sigact
.sa_handler
= cpu_signal
;
454 sigaction(SIG_IPI
, &sigact
, NULL
);
457 sigaddset(&set
, SIG_IPI
);
458 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
461 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
464 static sigset_t
block_io_signals(void)
467 struct sigaction action
;
469 /* SIGUSR2 used by posix-aio-compat.c */
471 sigaddset(&set
, SIGUSR2
);
472 pthread_sigmask(SIG_UNBLOCK
, &set
, NULL
);
475 sigaddset(&set
, SIGIO
);
476 sigaddset(&set
, SIGALRM
);
477 sigaddset(&set
, SIG_IPI
);
478 sigaddset(&set
, SIGBUS
);
479 pthread_sigmask(SIG_BLOCK
, &set
, NULL
);
481 memset(&action
, 0, sizeof(action
));
482 action
.sa_flags
= SA_SIGINFO
;
483 action
.sa_sigaction
= (void (*)(int, siginfo_t
*, void*))sigbus_handler
;
484 sigaction(SIGBUS
, &action
, NULL
);
485 prctl(PR_MCE_KILL
, 1, 1, 0, 0);
490 static int qemu_signalfd_init(sigset_t mask
)
494 sigfd
= qemu_signalfd(&mask
);
496 fprintf(stderr
, "failed to create signalfd\n");
500 fcntl_setfl(sigfd
, O_NONBLOCK
);
502 qemu_set_fd_handler2(sigfd
, NULL
, sigfd_handler
, NULL
,
503 (void *)(unsigned long) sigfd
);
508 int qemu_init_main_loop(void)
511 sigset_t blocked_signals
;
513 cpu_set_debug_excp_handler(cpu_debug_handler
);
515 blocked_signals
= block_io_signals();
517 ret
= qemu_signalfd_init(blocked_signals
);
521 /* Note eventfd must be drained before signalfd handlers run */
522 ret
= qemu_event_init();
526 qemu_cond_init(&qemu_pause_cond
);
527 qemu_cond_init(&qemu_system_cond
);
528 qemu_mutex_init(&qemu_fair_mutex
);
529 qemu_mutex_init(&qemu_global_mutex
);
530 qemu_mutex_lock(&qemu_global_mutex
);
532 qemu_thread_self(&io_thread
);
537 void qemu_main_loop_start(void)
539 qemu_system_ready
= 1;
540 qemu_cond_broadcast(&qemu_system_cond
);
543 void run_on_cpu(CPUState
*env
, void (*func
)(void *data
), void *data
)
545 struct qemu_work_item wi
;
547 if (qemu_cpu_self(env
)) {
554 if (!env
->queued_work_first
)
555 env
->queued_work_first
= &wi
;
557 env
->queued_work_last
->next
= &wi
;
558 env
->queued_work_last
= &wi
;
564 CPUState
*self_env
= cpu_single_env
;
566 qemu_cond_wait(&qemu_work_cond
, &qemu_global_mutex
);
567 cpu_single_env
= self_env
;
571 static void flush_queued_work(CPUState
*env
)
573 struct qemu_work_item
*wi
;
575 if (!env
->queued_work_first
)
578 while ((wi
= env
->queued_work_first
)) {
579 env
->queued_work_first
= wi
->next
;
583 env
->queued_work_last
= NULL
;
584 qemu_cond_broadcast(&qemu_work_cond
);
587 static void qemu_wait_io_event_common(CPUState
*env
)
592 qemu_cond_signal(&qemu_pause_cond
);
594 flush_queued_work(env
);
595 env
->thread_kicked
= false;
598 static void qemu_tcg_wait_io_event(void)
602 while (!any_cpu_has_work())
603 qemu_cond_timedwait(tcg_halt_cond
, &qemu_global_mutex
, 1000);
605 qemu_mutex_unlock(&qemu_global_mutex
);
608 * Users of qemu_global_mutex can be starved, having no chance
609 * to acquire it since this path will get to it first.
610 * So use another lock to provide fairness.
612 qemu_mutex_lock(&qemu_fair_mutex
);
613 qemu_mutex_unlock(&qemu_fair_mutex
);
615 qemu_mutex_lock(&qemu_global_mutex
);
617 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
618 qemu_wait_io_event_common(env
);
622 static void sigbus_reraise(void)
625 struct sigaction action
;
627 memset(&action
, 0, sizeof(action
));
628 action
.sa_handler
= SIG_DFL
;
629 if (!sigaction(SIGBUS
, &action
, NULL
)) {
632 sigaddset(&set
, SIGBUS
);
633 sigprocmask(SIG_UNBLOCK
, &set
, NULL
);
635 perror("Failed to re-raise SIGBUS!\n");
639 static void sigbus_handler(int n
, struct qemu_signalfd_siginfo
*siginfo
,
642 if (kvm_on_sigbus(siginfo
->ssi_code
, (void *)(intptr_t)siginfo
->ssi_addr
)) {
647 static void qemu_kvm_eat_signals(CPUState
*env
)
649 struct timespec ts
= { 0, 0 };
655 sigemptyset(&waitset
);
656 sigaddset(&waitset
, SIG_IPI
);
657 sigaddset(&waitset
, SIGBUS
);
660 r
= sigtimedwait(&waitset
, &siginfo
, &ts
);
661 if (r
== -1 && !(errno
== EAGAIN
|| errno
== EINTR
)) {
662 perror("sigtimedwait");
668 if (kvm_on_sigbus_vcpu(env
, siginfo
.si_code
, siginfo
.si_addr
)) {
676 r
= sigpending(&chkset
);
678 perror("sigpending");
681 } while (sigismember(&chkset
, SIG_IPI
) || sigismember(&chkset
, SIGBUS
));
684 static void qemu_kvm_wait_io_event(CPUState
*env
)
686 while (!cpu_has_work(env
))
687 qemu_cond_timedwait(env
->halt_cond
, &qemu_global_mutex
, 1000);
689 qemu_kvm_eat_signals(env
);
690 qemu_wait_io_event_common(env
);
693 static int qemu_cpu_exec(CPUState
*env
);
695 static void *kvm_cpu_thread_fn(void *arg
)
700 qemu_mutex_lock(&qemu_global_mutex
);
701 qemu_thread_self(env
->thread
);
703 r
= kvm_init_vcpu(env
);
705 fprintf(stderr
, "kvm_init_vcpu failed: %s\n", strerror(-r
));
709 qemu_kvm_init_cpu_signals(env
);
711 /* signal CPU creation */
713 qemu_cond_signal(&qemu_cpu_cond
);
715 /* and wait for machine initialization */
716 while (!qemu_system_ready
)
717 qemu_cond_timedwait(&qemu_system_cond
, &qemu_global_mutex
, 100);
720 if (cpu_can_run(env
))
722 qemu_kvm_wait_io_event(env
);
728 static void *tcg_cpu_thread_fn(void *arg
)
732 qemu_tcg_init_cpu_signals();
733 qemu_thread_self(env
->thread
);
735 /* signal CPU creation */
736 qemu_mutex_lock(&qemu_global_mutex
);
737 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
)
739 qemu_cond_signal(&qemu_cpu_cond
);
741 /* and wait for machine initialization */
742 while (!qemu_system_ready
)
743 qemu_cond_timedwait(&qemu_system_cond
, &qemu_global_mutex
, 100);
747 qemu_tcg_wait_io_event();
753 void qemu_cpu_kick(void *_env
)
755 CPUState
*env
= _env
;
756 qemu_cond_broadcast(env
->halt_cond
);
757 if (!env
->thread_kicked
) {
758 qemu_thread_signal(env
->thread
, SIG_IPI
);
759 env
->thread_kicked
= true;
763 int qemu_cpu_self(void *_env
)
765 CPUState
*env
= _env
;
768 qemu_thread_self(&this);
770 return qemu_thread_equal(&this, env
->thread
);
773 void qemu_mutex_lock_iothread(void)
776 qemu_mutex_lock(&qemu_global_mutex
);
778 qemu_mutex_lock(&qemu_fair_mutex
);
779 if (qemu_mutex_trylock(&qemu_global_mutex
)) {
780 qemu_thread_signal(tcg_cpu_thread
, SIG_IPI
);
781 qemu_mutex_lock(&qemu_global_mutex
);
783 qemu_mutex_unlock(&qemu_fair_mutex
);
787 void qemu_mutex_unlock_iothread(void)
789 qemu_mutex_unlock(&qemu_global_mutex
);
792 static int all_vcpus_paused(void)
794 CPUState
*penv
= first_cpu
;
799 penv
= (CPUState
*)penv
->next_cpu
;
805 void pause_all_vcpus(void)
807 CPUState
*penv
= first_cpu
;
812 penv
= (CPUState
*)penv
->next_cpu
;
815 while (!all_vcpus_paused()) {
816 qemu_cond_timedwait(&qemu_pause_cond
, &qemu_global_mutex
, 100);
820 penv
= (CPUState
*)penv
->next_cpu
;
825 void resume_all_vcpus(void)
827 CPUState
*penv
= first_cpu
;
833 penv
= (CPUState
*)penv
->next_cpu
;
837 static void tcg_init_vcpu(void *_env
)
839 CPUState
*env
= _env
;
840 /* share a single thread for all cpus with TCG */
841 if (!tcg_cpu_thread
) {
842 env
->thread
= qemu_mallocz(sizeof(QemuThread
));
843 env
->halt_cond
= qemu_mallocz(sizeof(QemuCond
));
844 qemu_cond_init(env
->halt_cond
);
845 qemu_thread_create(env
->thread
, tcg_cpu_thread_fn
, env
);
846 while (env
->created
== 0)
847 qemu_cond_timedwait(&qemu_cpu_cond
, &qemu_global_mutex
, 100);
848 tcg_cpu_thread
= env
->thread
;
849 tcg_halt_cond
= env
->halt_cond
;
851 env
->thread
= tcg_cpu_thread
;
852 env
->halt_cond
= tcg_halt_cond
;
856 static void kvm_start_vcpu(CPUState
*env
)
858 env
->thread
= qemu_mallocz(sizeof(QemuThread
));
859 env
->halt_cond
= qemu_mallocz(sizeof(QemuCond
));
860 qemu_cond_init(env
->halt_cond
);
861 qemu_thread_create(env
->thread
, kvm_cpu_thread_fn
, env
);
862 while (env
->created
== 0)
863 qemu_cond_timedwait(&qemu_cpu_cond
, &qemu_global_mutex
, 100);
866 void qemu_init_vcpu(void *_env
)
868 CPUState
*env
= _env
;
870 env
->nr_cores
= smp_cores
;
871 env
->nr_threads
= smp_threads
;
878 void qemu_notify_event(void)
880 qemu_event_increment();
883 static void qemu_system_vmstop_request(int reason
)
885 vmstop_requested
= reason
;
889 void cpu_stop_current(void)
891 if (cpu_single_env
) {
892 cpu_single_env
->stopped
= 1;
893 cpu_exit(cpu_single_env
);
897 void vm_stop(int reason
)
900 qemu_thread_self(&me
);
902 if (!qemu_thread_equal(&me
, &io_thread
)) {
903 qemu_system_vmstop_request(reason
);
905 * FIXME: should not return to device code in case
906 * vm_stop() has been requested.
916 static int qemu_cpu_exec(CPUState
*env
)
919 #ifdef CONFIG_PROFILER
923 #ifdef CONFIG_PROFILER
924 ti
= profile_getclock();
929 qemu_icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
930 env
->icount_decr
.u16
.low
= 0;
931 env
->icount_extra
= 0;
932 count
= qemu_icount_round (qemu_next_deadline());
933 qemu_icount
+= count
;
934 decr
= (count
> 0xffff) ? 0xffff : count
;
936 env
->icount_decr
.u16
.low
= decr
;
937 env
->icount_extra
= count
;
940 #ifdef CONFIG_PROFILER
941 qemu_time
+= profile_getclock() - ti
;
944 /* Fold pending instructions back into the
945 instruction counter, and clear the interrupt flag. */
946 qemu_icount
-= (env
->icount_decr
.u16
.low
947 + env
->icount_extra
);
948 env
->icount_decr
.u32
= 0;
949 env
->icount_extra
= 0;
954 bool cpu_exec_all(void)
956 if (next_cpu
== NULL
)
957 next_cpu
= first_cpu
;
958 for (; next_cpu
!= NULL
&& !exit_request
; next_cpu
= next_cpu
->next_cpu
) {
959 CPUState
*env
= next_cpu
;
961 qemu_clock_enable(vm_clock
,
962 (env
->singlestep_enabled
& SSTEP_NOTIMER
) == 0);
964 if (qemu_alarm_pending())
966 if (cpu_can_run(env
)) {
967 if (qemu_cpu_exec(env
) == EXCP_DEBUG
) {
970 } else if (env
->stop
) {
975 return any_cpu_has_work();
978 void set_numa_modes(void)
983 for (env
= first_cpu
; env
!= NULL
; env
= env
->next_cpu
) {
984 for (i
= 0; i
< nb_numa_nodes
; i
++) {
985 if (node_cpumask
[i
] & (1 << env
->cpu_index
)) {
992 void set_cpu_log(const char *optarg
)
995 const CPULogItem
*item
;
997 mask
= cpu_str_to_log_mask(optarg
);
999 printf("Log items (comma separated):\n");
1000 for (item
= cpu_log_items
; item
->mask
!= 0; item
++) {
1001 printf("%-10s %s\n", item
->name
, item
->help
);
1008 /* Return the virtual CPU time, based on the instruction counter. */
1009 int64_t cpu_get_icount(void)
1012 CPUState
*env
= cpu_single_env
;;
1014 icount
= qemu_icount
;
1016 if (!can_do_io(env
)) {
1017 fprintf(stderr
, "Bad clock read\n");
1019 icount
-= (env
->icount_decr
.u16
.low
+ env
->icount_extra
);
1021 return qemu_icount_bias
+ (icount
<< icount_time_shift
);
1024 void list_cpus(FILE *f
, fprintf_function cpu_fprintf
, const char *optarg
)
1026 /* XXX: implement xxx_cpu_list for targets that still miss it */
1027 #if defined(cpu_list_id)
1028 cpu_list_id(f
, cpu_fprintf
, optarg
);
1029 #elif defined(cpu_list)
1030 cpu_list(f
, cpu_fprintf
); /* deprecated */