]> git.proxmox.com Git - qemu.git/blame - cpus.c
qemu-timer: move icount to cpus.c
[qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
28#include "monitor.h"
29#include "sysemu.h"
30#include "gdbstub.h"
31#include "dma.h"
32#include "kvm.h"
33
96284e89 34#include "qemu-thread.h"
296af7c9 35#include "cpus.h"
0ff0fc19
JK
36
37#ifndef _WIN32
a8486bc9 38#include "compatfd.h"
0ff0fc19 39#endif
296af7c9 40
7277e027
BS
41#ifdef SIGRTMIN
42#define SIG_IPI (SIGRTMIN+4)
43#else
44#define SIG_IPI SIGUSR1
45#endif
46
6d9cb73c
JK
47#ifdef CONFIG_LINUX
48
49#include <sys/prctl.h>
50
c0532a76
MT
51#ifndef PR_MCE_KILL
52#define PR_MCE_KILL 33
53#endif
54
6d9cb73c
JK
55#ifndef PR_MCE_KILL_SET
56#define PR_MCE_KILL_SET 1
57#endif
58
59#ifndef PR_MCE_KILL_EARLY
60#define PR_MCE_KILL_EARLY 1
61#endif
62
63#endif /* CONFIG_LINUX */
64
296af7c9
BS
65static CPUState *next_cpu;
66
946fb27c
PB
67/***********************************************************/
68/* guest cycle counter */
69
70/* Conversion factor from emulated instructions to virtual clock ticks. */
71static int icount_time_shift;
72/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
73#define MAX_ICOUNT_SHIFT 10
74/* Compensate for varying guest execution speed. */
75static int64_t qemu_icount_bias;
76static QEMUTimer *icount_rt_timer;
77static QEMUTimer *icount_vm_timer;
78static QEMUTimer *icount_warp_timer;
79static int64_t vm_clock_warp_start;
80static int64_t qemu_icount;
81
82typedef struct TimersState {
83 int64_t cpu_ticks_prev;
84 int64_t cpu_ticks_offset;
85 int64_t cpu_clock_offset;
86 int32_t cpu_ticks_enabled;
87 int64_t dummy;
88} TimersState;
89
90TimersState timers_state;
91
92/* Return the virtual CPU time, based on the instruction counter. */
93int64_t cpu_get_icount(void)
94{
95 int64_t icount;
96 CPUState *env = cpu_single_env;;
97
98 icount = qemu_icount;
99 if (env) {
100 if (!can_do_io(env)) {
101 fprintf(stderr, "Bad clock read\n");
102 }
103 icount -= (env->icount_decr.u16.low + env->icount_extra);
104 }
105 return qemu_icount_bias + (icount << icount_time_shift);
106}
107
108/* return the host CPU cycle counter and handle stop/restart */
109int64_t cpu_get_ticks(void)
110{
111 if (use_icount) {
112 return cpu_get_icount();
113 }
114 if (!timers_state.cpu_ticks_enabled) {
115 return timers_state.cpu_ticks_offset;
116 } else {
117 int64_t ticks;
118 ticks = cpu_get_real_ticks();
119 if (timers_state.cpu_ticks_prev > ticks) {
120 /* Note: non increasing ticks may happen if the host uses
121 software suspend */
122 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
123 }
124 timers_state.cpu_ticks_prev = ticks;
125 return ticks + timers_state.cpu_ticks_offset;
126 }
127}
128
129/* return the host CPU monotonic timer and handle stop/restart */
130int64_t cpu_get_clock(void)
131{
132 int64_t ti;
133 if (!timers_state.cpu_ticks_enabled) {
134 return timers_state.cpu_clock_offset;
135 } else {
136 ti = get_clock();
137 return ti + timers_state.cpu_clock_offset;
138 }
139}
140
141/* enable cpu_get_ticks() */
142void cpu_enable_ticks(void)
143{
144 if (!timers_state.cpu_ticks_enabled) {
145 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
146 timers_state.cpu_clock_offset -= get_clock();
147 timers_state.cpu_ticks_enabled = 1;
148 }
149}
150
151/* disable cpu_get_ticks() : the clock is stopped. You must not call
152 cpu_get_ticks() after that. */
153void cpu_disable_ticks(void)
154{
155 if (timers_state.cpu_ticks_enabled) {
156 timers_state.cpu_ticks_offset = cpu_get_ticks();
157 timers_state.cpu_clock_offset = cpu_get_clock();
158 timers_state.cpu_ticks_enabled = 0;
159 }
160}
161
162/* Correlation between real and virtual time is always going to be
163 fairly approximate, so ignore small variation.
164 When the guest is idle real and virtual time will be aligned in
165 the IO wait loop. */
166#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
167
168static void icount_adjust(void)
169{
170 int64_t cur_time;
171 int64_t cur_icount;
172 int64_t delta;
173 static int64_t last_delta;
174 /* If the VM is not running, then do nothing. */
175 if (!runstate_is_running()) {
176 return;
177 }
178 cur_time = cpu_get_clock();
179 cur_icount = qemu_get_clock_ns(vm_clock);
180 delta = cur_icount - cur_time;
181 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
182 if (delta > 0
183 && last_delta + ICOUNT_WOBBLE < delta * 2
184 && icount_time_shift > 0) {
185 /* The guest is getting too far ahead. Slow time down. */
186 icount_time_shift--;
187 }
188 if (delta < 0
189 && last_delta - ICOUNT_WOBBLE > delta * 2
190 && icount_time_shift < MAX_ICOUNT_SHIFT) {
191 /* The guest is getting too far behind. Speed time up. */
192 icount_time_shift++;
193 }
194 last_delta = delta;
195 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
196}
197
198static void icount_adjust_rt(void *opaque)
199{
200 qemu_mod_timer(icount_rt_timer,
201 qemu_get_clock_ms(rt_clock) + 1000);
202 icount_adjust();
203}
204
205static void icount_adjust_vm(void *opaque)
206{
207 qemu_mod_timer(icount_vm_timer,
208 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
209 icount_adjust();
210}
211
212static int64_t qemu_icount_round(int64_t count)
213{
214 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
215}
216
217static void icount_warp_rt(void *opaque)
218{
219 if (vm_clock_warp_start == -1) {
220 return;
221 }
222
223 if (runstate_is_running()) {
224 int64_t clock = qemu_get_clock_ns(rt_clock);
225 int64_t warp_delta = clock - vm_clock_warp_start;
226 if (use_icount == 1) {
227 qemu_icount_bias += warp_delta;
228 } else {
229 /*
230 * In adaptive mode, do not let the vm_clock run too
231 * far ahead of real time.
232 */
233 int64_t cur_time = cpu_get_clock();
234 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
235 int64_t delta = cur_time - cur_icount;
236 qemu_icount_bias += MIN(warp_delta, delta);
237 }
238 if (qemu_clock_expired(vm_clock)) {
239 qemu_notify_event();
240 }
241 }
242 vm_clock_warp_start = -1;
243}
244
245void qemu_clock_warp(QEMUClock *clock)
246{
247 int64_t deadline;
248
249 /*
250 * There are too many global variables to make the "warp" behavior
251 * applicable to other clocks. But a clock argument removes the
252 * need for if statements all over the place.
253 */
254 if (clock != vm_clock || !use_icount) {
255 return;
256 }
257
258 /*
259 * If the CPUs have been sleeping, advance the vm_clock timer now. This
260 * ensures that the deadline for the timer is computed correctly below.
261 * This also makes sure that the insn counter is synchronized before the
262 * CPU starts running, in case the CPU is woken by an event other than
263 * the earliest vm_clock timer.
264 */
265 icount_warp_rt(NULL);
266 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
267 qemu_del_timer(icount_warp_timer);
268 return;
269 }
270
271 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
272 deadline = qemu_clock_deadline(vm_clock);
273 if (deadline > 0) {
274 /*
275 * Ensure the vm_clock proceeds even when the virtual CPU goes to
276 * sleep. Otherwise, the CPU might be waiting for a future timer
277 * interrupt to wake it up, but the interrupt never comes because
278 * the vCPU isn't running any insns and thus doesn't advance the
279 * vm_clock.
280 *
281 * An extreme solution for this problem would be to never let VCPUs
282 * sleep in icount mode if there is a pending vm_clock timer; rather
283 * time could just advance to the next vm_clock event. Instead, we
284 * do stop VCPUs and only advance vm_clock after some "real" time,
285 * (related to the time left until the next event) has passed. This
286 * rt_clock timer will do this. This avoids that the warps are too
287 * visible externally---for example, you will not be sending network
288 * packets continously instead of every 100ms.
289 */
290 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
291 } else {
292 qemu_notify_event();
293 }
294}
295
296static const VMStateDescription vmstate_timers = {
297 .name = "timer",
298 .version_id = 2,
299 .minimum_version_id = 1,
300 .minimum_version_id_old = 1,
301 .fields = (VMStateField[]) {
302 VMSTATE_INT64(cpu_ticks_offset, TimersState),
303 VMSTATE_INT64(dummy, TimersState),
304 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
305 VMSTATE_END_OF_LIST()
306 }
307};
308
309void configure_icount(const char *option)
310{
311 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
312 if (!option) {
313 return;
314 }
315
316 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
317 if (strcmp(option, "auto") != 0) {
318 icount_time_shift = strtol(option, NULL, 0);
319 use_icount = 1;
320 return;
321 }
322
323 use_icount = 2;
324
325 /* 125MIPS seems a reasonable initial guess at the guest speed.
326 It will be corrected fairly quickly anyway. */
327 icount_time_shift = 3;
328
329 /* Have both realtime and virtual time triggers for speed adjustment.
330 The realtime trigger catches emulated time passing too slowly,
331 the virtual time trigger catches emulated time passing too fast.
332 Realtime triggers occur even when idle, so use them less frequently
333 than VM triggers. */
334 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
335 qemu_mod_timer(icount_rt_timer,
336 qemu_get_clock_ms(rt_clock) + 1000);
337 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
338 qemu_mod_timer(icount_vm_timer,
339 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
340}
341
296af7c9
BS
342/***********************************************************/
343void hw_error(const char *fmt, ...)
344{
345 va_list ap;
346 CPUState *env;
347
348 va_start(ap, fmt);
349 fprintf(stderr, "qemu: hardware error: ");
350 vfprintf(stderr, fmt, ap);
351 fprintf(stderr, "\n");
352 for(env = first_cpu; env != NULL; env = env->next_cpu) {
353 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
354#ifdef TARGET_I386
355 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
356#else
357 cpu_dump_state(env, stderr, fprintf, 0);
358#endif
359 }
360 va_end(ap);
361 abort();
362}
363
364void cpu_synchronize_all_states(void)
365{
366 CPUState *cpu;
367
368 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
369 cpu_synchronize_state(cpu);
370 }
371}
372
373void cpu_synchronize_all_post_reset(void)
374{
375 CPUState *cpu;
376
377 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
378 cpu_synchronize_post_reset(cpu);
379 }
380}
381
382void cpu_synchronize_all_post_init(void)
383{
384 CPUState *cpu;
385
386 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
387 cpu_synchronize_post_init(cpu);
388 }
389}
390
3ae9501c
MT
391int cpu_is_stopped(CPUState *env)
392{
1354869c 393 return !runstate_is_running() || env->stopped;
3ae9501c
MT
394}
395
1dfb4dd9 396static void do_vm_stop(RunState state)
296af7c9 397{
1354869c 398 if (runstate_is_running()) {
296af7c9 399 cpu_disable_ticks();
296af7c9 400 pause_all_vcpus();
f5bbfba1 401 runstate_set(state);
1dfb4dd9 402 vm_state_notify(0, state);
55df6f33
MT
403 qemu_aio_flush();
404 bdrv_flush_all();
296af7c9
BS
405 monitor_protocol_event(QEVENT_STOP, NULL);
406 }
407}
408
409static int cpu_can_run(CPUState *env)
410{
0ab07c62 411 if (env->stop) {
296af7c9 412 return 0;
0ab07c62 413 }
1354869c 414 if (env->stopped || !runstate_is_running()) {
296af7c9 415 return 0;
0ab07c62 416 }
296af7c9
BS
417 return 1;
418}
419
16400322 420static bool cpu_thread_is_idle(CPUState *env)
296af7c9 421{
16400322
JK
422 if (env->stop || env->queued_work_first) {
423 return false;
424 }
1354869c 425 if (env->stopped || !runstate_is_running()) {
16400322
JK
426 return true;
427 }
f2c1cc81
JK
428 if (!env->halted || qemu_cpu_has_work(env) ||
429 (kvm_enabled() && kvm_irqchip_in_kernel())) {
16400322
JK
430 return false;
431 }
432 return true;
296af7c9
BS
433}
434
ab33fcda 435bool all_cpu_threads_idle(void)
296af7c9
BS
436{
437 CPUState *env;
438
16400322
JK
439 for (env = first_cpu; env != NULL; env = env->next_cpu) {
440 if (!cpu_thread_is_idle(env)) {
441 return false;
442 }
443 }
444 return true;
296af7c9
BS
445}
446
1009d2ed 447static void cpu_handle_guest_debug(CPUState *env)
83f338f7 448{
3c638d06 449 gdb_set_stop_cpu(env);
8cf71710 450 qemu_system_debug_request();
83f338f7 451 env->stopped = 1;
3c638d06
JK
452}
453
714bd040
PB
454static void cpu_signal(int sig)
455{
456 if (cpu_single_env) {
457 cpu_exit(cpu_single_env);
458 }
459 exit_request = 1;
460}
714bd040 461
6d9cb73c
JK
462#ifdef CONFIG_LINUX
463static void sigbus_reraise(void)
464{
465 sigset_t set;
466 struct sigaction action;
467
468 memset(&action, 0, sizeof(action));
469 action.sa_handler = SIG_DFL;
470 if (!sigaction(SIGBUS, &action, NULL)) {
471 raise(SIGBUS);
472 sigemptyset(&set);
473 sigaddset(&set, SIGBUS);
474 sigprocmask(SIG_UNBLOCK, &set, NULL);
475 }
476 perror("Failed to re-raise SIGBUS!\n");
477 abort();
478}
479
480static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
481 void *ctx)
482{
483 if (kvm_on_sigbus(siginfo->ssi_code,
484 (void *)(intptr_t)siginfo->ssi_addr)) {
485 sigbus_reraise();
486 }
487}
488
489static void qemu_init_sigbus(void)
490{
491 struct sigaction action;
492
493 memset(&action, 0, sizeof(action));
494 action.sa_flags = SA_SIGINFO;
495 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
496 sigaction(SIGBUS, &action, NULL);
497
498 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
499}
500
1ab3c6c0
JK
501static void qemu_kvm_eat_signals(CPUState *env)
502{
503 struct timespec ts = { 0, 0 };
504 siginfo_t siginfo;
505 sigset_t waitset;
506 sigset_t chkset;
507 int r;
508
509 sigemptyset(&waitset);
510 sigaddset(&waitset, SIG_IPI);
511 sigaddset(&waitset, SIGBUS);
512
513 do {
514 r = sigtimedwait(&waitset, &siginfo, &ts);
515 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
516 perror("sigtimedwait");
517 exit(1);
518 }
519
520 switch (r) {
521 case SIGBUS:
522 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
523 sigbus_reraise();
524 }
525 break;
526 default:
527 break;
528 }
529
530 r = sigpending(&chkset);
531 if (r == -1) {
532 perror("sigpending");
533 exit(1);
534 }
535 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
536}
537
6d9cb73c
JK
538#else /* !CONFIG_LINUX */
539
540static void qemu_init_sigbus(void)
541{
542}
1ab3c6c0
JK
543
544static void qemu_kvm_eat_signals(CPUState *env)
545{
546}
6d9cb73c
JK
547#endif /* !CONFIG_LINUX */
548
296af7c9
BS
549#ifndef _WIN32
550static int io_thread_fd = -1;
551
552static void qemu_event_increment(void)
553{
554 /* Write 8 bytes to be compatible with eventfd. */
26a82330 555 static const uint64_t val = 1;
296af7c9
BS
556 ssize_t ret;
557
0ab07c62 558 if (io_thread_fd == -1) {
296af7c9 559 return;
0ab07c62 560 }
296af7c9
BS
561 do {
562 ret = write(io_thread_fd, &val, sizeof(val));
563 } while (ret < 0 && errno == EINTR);
564
565 /* EAGAIN is fine, a read must be pending. */
566 if (ret < 0 && errno != EAGAIN) {
77bec686 567 fprintf(stderr, "qemu_event_increment: write() failed: %s\n",
296af7c9
BS
568 strerror(errno));
569 exit (1);
570 }
571}
572
573static void qemu_event_read(void *opaque)
574{
e0efb993 575 int fd = (intptr_t)opaque;
296af7c9
BS
576 ssize_t len;
577 char buffer[512];
578
579 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
580 do {
581 len = read(fd, buffer, sizeof(buffer));
582 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
583}
584
585static int qemu_event_init(void)
586{
587 int err;
588 int fds[2];
589
590 err = qemu_eventfd(fds);
0ab07c62 591 if (err == -1) {
296af7c9 592 return -errno;
0ab07c62 593 }
296af7c9 594 err = fcntl_setfl(fds[0], O_NONBLOCK);
0ab07c62 595 if (err < 0) {
296af7c9 596 goto fail;
0ab07c62 597 }
296af7c9 598 err = fcntl_setfl(fds[1], O_NONBLOCK);
0ab07c62 599 if (err < 0) {
296af7c9 600 goto fail;
0ab07c62 601 }
296af7c9 602 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
e0efb993 603 (void *)(intptr_t)fds[0]);
296af7c9
BS
604
605 io_thread_fd = fds[1];
606 return 0;
607
608fail:
609 close(fds[0]);
610 close(fds[1]);
611 return err;
612}
55f8d6ac 613
55f8d6ac
JK
614static void dummy_signal(int sig)
615{
616}
55f8d6ac 617
d0f294ce
JK
618/* If we have signalfd, we mask out the signals we want to handle and then
619 * use signalfd to listen for them. We rely on whatever the current signal
620 * handler is to dispatch the signals when we receive them.
621 */
622static void sigfd_handler(void *opaque)
623{
e0efb993 624 int fd = (intptr_t)opaque;
d0f294ce
JK
625 struct qemu_signalfd_siginfo info;
626 struct sigaction action;
627 ssize_t len;
628
629 while (1) {
630 do {
631 len = read(fd, &info, sizeof(info));
632 } while (len == -1 && errno == EINTR);
633
634 if (len == -1 && errno == EAGAIN) {
635 break;
636 }
637
638 if (len != sizeof(info)) {
639 printf("read from sigfd returned %zd: %m\n", len);
640 return;
641 }
642
643 sigaction(info.ssi_signo, NULL, &action);
644 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
645 action.sa_sigaction(info.ssi_signo,
646 (siginfo_t *)&info, NULL);
647 } else if (action.sa_handler) {
648 action.sa_handler(info.ssi_signo);
649 }
650 }
651}
652
712ae480 653static int qemu_signal_init(void)
d0f294ce
JK
654{
655 int sigfd;
712ae480 656 sigset_t set;
d0f294ce 657
89b9ba66
AR
658 /*
659 * SIG_IPI must be blocked in the main thread and must not be caught
660 * by sigwait() in the signal thread. Otherwise, the cpu thread will
661 * not catch it reliably.
662 */
663 sigemptyset(&set);
664 sigaddset(&set, SIG_IPI);
665 pthread_sigmask(SIG_BLOCK, &set, NULL);
666
712ae480
PB
667 sigemptyset(&set);
668 sigaddset(&set, SIGIO);
669 sigaddset(&set, SIGALRM);
712ae480 670 sigaddset(&set, SIGBUS);
5664aed9 671 pthread_sigmask(SIG_BLOCK, &set, NULL);
712ae480
PB
672
673 sigfd = qemu_signalfd(&set);
d0f294ce
JK
674 if (sigfd == -1) {
675 fprintf(stderr, "failed to create signalfd\n");
676 return -errno;
677 }
678
679 fcntl_setfl(sigfd, O_NONBLOCK);
680
681 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
e0efb993 682 (void *)(intptr_t)sigfd);
d0f294ce
JK
683
684 return 0;
685}
686
714bd040
PB
687static void qemu_kvm_init_cpu_signals(CPUState *env)
688{
689 int r;
690 sigset_t set;
691 struct sigaction sigact;
692
693 memset(&sigact, 0, sizeof(sigact));
694 sigact.sa_handler = dummy_signal;
695 sigaction(SIG_IPI, &sigact, NULL);
696
714bd040
PB
697 pthread_sigmask(SIG_BLOCK, NULL, &set);
698 sigdelset(&set, SIG_IPI);
699 sigdelset(&set, SIGBUS);
700 r = kvm_set_signal_mask(env, &set);
701 if (r) {
702 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
703 exit(1);
704 }
714bd040 705
714bd040
PB
706 sigdelset(&set, SIG_IPI);
707 sigdelset(&set, SIGBUS);
708 r = kvm_set_signal_mask(env, &set);
709 if (r) {
710 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
711 exit(1);
712 }
713}
714
715static void qemu_tcg_init_cpu_signals(void)
716{
714bd040
PB
717 sigset_t set;
718 struct sigaction sigact;
719
720 memset(&sigact, 0, sizeof(sigact));
721 sigact.sa_handler = cpu_signal;
722 sigaction(SIG_IPI, &sigact, NULL);
723
724 sigemptyset(&set);
725 sigaddset(&set, SIG_IPI);
726 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
727}
728
55f8d6ac
JK
729#else /* _WIN32 */
730
296af7c9
BS
731HANDLE qemu_event_handle;
732
733static void dummy_event_handler(void *opaque)
734{
735}
736
737static int qemu_event_init(void)
738{
739 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
740 if (!qemu_event_handle) {
741 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
742 return -1;
743 }
744 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
745 return 0;
746}
747
748static void qemu_event_increment(void)
749{
750 if (!SetEvent(qemu_event_handle)) {
751 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
752 GetLastError());
753 exit (1);
754 }
755}
9a36085b 756
712ae480
PB
757static int qemu_signal_init(void)
758{
759 return 0;
760}
761
ff48eb5f
JK
762static void qemu_kvm_init_cpu_signals(CPUState *env)
763{
714bd040
PB
764 abort();
765}
ff48eb5f 766
714bd040
PB
767static void qemu_tcg_init_cpu_signals(void)
768{
ff48eb5f 769}
714bd040 770#endif /* _WIN32 */
ff48eb5f 771
296af7c9 772QemuMutex qemu_global_mutex;
46daff13
PB
773static QemuCond qemu_io_proceeded_cond;
774static bool iothread_requesting_mutex;
296af7c9
BS
775
776static QemuThread io_thread;
777
778static QemuThread *tcg_cpu_thread;
779static QemuCond *tcg_halt_cond;
780
296af7c9
BS
781/* cpu creation */
782static QemuCond qemu_cpu_cond;
783/* system init */
296af7c9 784static QemuCond qemu_pause_cond;
e82bcec2 785static QemuCond qemu_work_cond;
296af7c9 786
296af7c9
BS
787int qemu_init_main_loop(void)
788{
789 int ret;
790
6d9cb73c 791 qemu_init_sigbus();
3c638d06 792
712ae480 793 ret = qemu_signal_init();
0ab07c62 794 if (ret) {
a8486bc9 795 return ret;
0ab07c62 796 }
a8486bc9
MT
797
798 /* Note eventfd must be drained before signalfd handlers run */
296af7c9 799 ret = qemu_event_init();
0ab07c62 800 if (ret) {
296af7c9 801 return ret;
0ab07c62 802 }
296af7c9 803
ed94592b 804 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
805 qemu_cond_init(&qemu_pause_cond);
806 qemu_cond_init(&qemu_work_cond);
46daff13 807 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9
BS
808 qemu_mutex_init(&qemu_global_mutex);
809 qemu_mutex_lock(&qemu_global_mutex);
810
b7680cb6 811 qemu_thread_get_self(&io_thread);
296af7c9
BS
812
813 return 0;
814}
815
7277e027
BS
816void qemu_main_loop_start(void)
817{
fa7d1867 818 resume_all_vcpus();
7277e027
BS
819}
820
e82bcec2
MT
821void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
822{
823 struct qemu_work_item wi;
824
b7680cb6 825 if (qemu_cpu_is_self(env)) {
e82bcec2
MT
826 func(data);
827 return;
828 }
829
830 wi.func = func;
831 wi.data = data;
0ab07c62 832 if (!env->queued_work_first) {
e82bcec2 833 env->queued_work_first = &wi;
0ab07c62 834 } else {
e82bcec2 835 env->queued_work_last->next = &wi;
0ab07c62 836 }
e82bcec2
MT
837 env->queued_work_last = &wi;
838 wi.next = NULL;
839 wi.done = false;
840
841 qemu_cpu_kick(env);
842 while (!wi.done) {
843 CPUState *self_env = cpu_single_env;
844
845 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
846 cpu_single_env = self_env;
847 }
848}
849
850static void flush_queued_work(CPUState *env)
851{
852 struct qemu_work_item *wi;
853
0ab07c62 854 if (!env->queued_work_first) {
e82bcec2 855 return;
0ab07c62 856 }
e82bcec2
MT
857
858 while ((wi = env->queued_work_first)) {
859 env->queued_work_first = wi->next;
860 wi->func(wi->data);
861 wi->done = true;
862 }
863 env->queued_work_last = NULL;
864 qemu_cond_broadcast(&qemu_work_cond);
865}
866
296af7c9
BS
867static void qemu_wait_io_event_common(CPUState *env)
868{
869 if (env->stop) {
870 env->stop = 0;
871 env->stopped = 1;
872 qemu_cond_signal(&qemu_pause_cond);
873 }
e82bcec2 874 flush_queued_work(env);
aa2c364b 875 env->thread_kicked = false;
296af7c9
BS
876}
877
6cabe1f3 878static void qemu_tcg_wait_io_event(void)
296af7c9 879{
6cabe1f3
JK
880 CPUState *env;
881
16400322 882 while (all_cpu_threads_idle()) {
ab33fcda
PB
883 /* Start accounting real time to the virtual clock if the CPUs
884 are idle. */
885 qemu_clock_warp(vm_clock);
9705fbb5 886 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 887 }
296af7c9 888
46daff13
PB
889 while (iothread_requesting_mutex) {
890 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
891 }
6cabe1f3
JK
892
893 for (env = first_cpu; env != NULL; env = env->next_cpu) {
894 qemu_wait_io_event_common(env);
895 }
296af7c9
BS
896}
897
296af7c9
BS
898static void qemu_kvm_wait_io_event(CPUState *env)
899{
16400322 900 while (cpu_thread_is_idle(env)) {
9705fbb5 901 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
16400322 902 }
296af7c9 903
5db5bdac 904 qemu_kvm_eat_signals(env);
296af7c9
BS
905 qemu_wait_io_event_common(env);
906}
907
7e97cd88 908static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9
BS
909{
910 CPUState *env = arg;
84b4915d 911 int r;
296af7c9 912
6164e6d6 913 qemu_mutex_lock(&qemu_global_mutex);
b7680cb6 914 qemu_thread_get_self(env->thread);
dc7a09cf 915 env->thread_id = qemu_get_thread_id();
296af7c9 916
84b4915d
JK
917 r = kvm_init_vcpu(env);
918 if (r < 0) {
919 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
920 exit(1);
921 }
296af7c9 922
55f8d6ac 923 qemu_kvm_init_cpu_signals(env);
296af7c9
BS
924
925 /* signal CPU creation */
296af7c9
BS
926 env->created = 1;
927 qemu_cond_signal(&qemu_cpu_cond);
928
296af7c9 929 while (1) {
0ab07c62 930 if (cpu_can_run(env)) {
6792a57b 931 r = kvm_cpu_exec(env);
83f338f7 932 if (r == EXCP_DEBUG) {
1009d2ed 933 cpu_handle_guest_debug(env);
83f338f7 934 }
0ab07c62 935 }
296af7c9
BS
936 qemu_kvm_wait_io_event(env);
937 }
938
939 return NULL;
940}
941
7e97cd88 942static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9
BS
943{
944 CPUState *env = arg;
945
55f8d6ac 946 qemu_tcg_init_cpu_signals();
b7680cb6 947 qemu_thread_get_self(env->thread);
296af7c9
BS
948
949 /* signal CPU creation */
950 qemu_mutex_lock(&qemu_global_mutex);
0ab07c62 951 for (env = first_cpu; env != NULL; env = env->next_cpu) {
dc7a09cf 952 env->thread_id = qemu_get_thread_id();
296af7c9 953 env->created = 1;
0ab07c62 954 }
296af7c9
BS
955 qemu_cond_signal(&qemu_cpu_cond);
956
fa7d1867
JK
957 /* wait for initial kick-off after machine start */
958 while (first_cpu->stopped) {
959 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
0ab07c62 960 }
296af7c9
BS
961
962 while (1) {
472fb0c4 963 cpu_exec_all();
946fb27c 964 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
3b2319a3
PB
965 qemu_notify_event();
966 }
6cabe1f3 967 qemu_tcg_wait_io_event();
296af7c9
BS
968 }
969
970 return NULL;
971}
972
cc015e9a
PB
973static void qemu_cpu_kick_thread(CPUState *env)
974{
975#ifndef _WIN32
976 int err;
977
978 err = pthread_kill(env->thread->thread, SIG_IPI);
979 if (err) {
980 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
981 exit(1);
982 }
983#else /* _WIN32 */
984 if (!qemu_cpu_is_self(env)) {
985 SuspendThread(env->thread->thread);
986 cpu_signal(0);
987 ResumeThread(env->thread->thread);
988 }
989#endif
990}
991
296af7c9
BS
992void qemu_cpu_kick(void *_env)
993{
994 CPUState *env = _env;
296af7c9 995
296af7c9 996 qemu_cond_broadcast(env->halt_cond);
eae74cf9 997 if (kvm_enabled() && !env->thread_kicked) {
cc015e9a 998 qemu_cpu_kick_thread(env);
aa2c364b
JK
999 env->thread_kicked = true;
1000 }
296af7c9
BS
1001}
1002
46d62fac 1003void qemu_cpu_kick_self(void)
296af7c9 1004{
b55c22c6 1005#ifndef _WIN32
46d62fac 1006 assert(cpu_single_env);
296af7c9 1007
46d62fac 1008 if (!cpu_single_env->thread_kicked) {
cc015e9a 1009 qemu_cpu_kick_thread(cpu_single_env);
46d62fac 1010 cpu_single_env->thread_kicked = true;
296af7c9 1011 }
b55c22c6
PB
1012#else
1013 abort();
1014#endif
296af7c9
BS
1015}
1016
b7680cb6 1017int qemu_cpu_is_self(void *_env)
296af7c9 1018{
296af7c9 1019 CPUState *env = _env;
a8486bc9 1020
b7680cb6 1021 return qemu_thread_is_self(env->thread);
296af7c9
BS
1022}
1023
296af7c9
BS
1024void qemu_mutex_lock_iothread(void)
1025{
1026 if (kvm_enabled()) {
296af7c9 1027 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 1028 } else {
46daff13 1029 iothread_requesting_mutex = true;
1a28cac3 1030 if (qemu_mutex_trylock(&qemu_global_mutex)) {
cc015e9a 1031 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
1032 qemu_mutex_lock(&qemu_global_mutex);
1033 }
46daff13
PB
1034 iothread_requesting_mutex = false;
1035 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1036 }
296af7c9
BS
1037}
1038
1039void qemu_mutex_unlock_iothread(void)
1040{
1041 qemu_mutex_unlock(&qemu_global_mutex);
1042}
1043
1044static int all_vcpus_paused(void)
1045{
1046 CPUState *penv = first_cpu;
1047
1048 while (penv) {
0ab07c62 1049 if (!penv->stopped) {
296af7c9 1050 return 0;
0ab07c62 1051 }
296af7c9
BS
1052 penv = (CPUState *)penv->next_cpu;
1053 }
1054
1055 return 1;
1056}
1057
1058void pause_all_vcpus(void)
1059{
1060 CPUState *penv = first_cpu;
1061
1062 while (penv) {
1063 penv->stop = 1;
296af7c9
BS
1064 qemu_cpu_kick(penv);
1065 penv = (CPUState *)penv->next_cpu;
1066 }
1067
1068 while (!all_vcpus_paused()) {
be7d6c57 1069 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
296af7c9
BS
1070 penv = first_cpu;
1071 while (penv) {
1fbb22e5 1072 qemu_cpu_kick(penv);
296af7c9
BS
1073 penv = (CPUState *)penv->next_cpu;
1074 }
1075 }
1076}
1077
1078void resume_all_vcpus(void)
1079{
1080 CPUState *penv = first_cpu;
1081
1082 while (penv) {
1083 penv->stop = 0;
1084 penv->stopped = 0;
296af7c9
BS
1085 qemu_cpu_kick(penv);
1086 penv = (CPUState *)penv->next_cpu;
1087 }
1088}
1089
7e97cd88 1090static void qemu_tcg_init_vcpu(void *_env)
296af7c9
BS
1091{
1092 CPUState *env = _env;
0ab07c62 1093
296af7c9
BS
1094 /* share a single thread for all cpus with TCG */
1095 if (!tcg_cpu_thread) {
7267c094
AL
1096 env->thread = g_malloc0(sizeof(QemuThread));
1097 env->halt_cond = g_malloc0(sizeof(QemuCond));
296af7c9 1098 qemu_cond_init(env->halt_cond);
fa7d1867 1099 tcg_halt_cond = env->halt_cond;
7e97cd88 1100 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
0ab07c62 1101 while (env->created == 0) {
18a85728 1102 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1103 }
296af7c9 1104 tcg_cpu_thread = env->thread;
296af7c9
BS
1105 } else {
1106 env->thread = tcg_cpu_thread;
1107 env->halt_cond = tcg_halt_cond;
1108 }
1109}
1110
7e97cd88 1111static void qemu_kvm_start_vcpu(CPUState *env)
296af7c9 1112{
7267c094
AL
1113 env->thread = g_malloc0(sizeof(QemuThread));
1114 env->halt_cond = g_malloc0(sizeof(QemuCond));
296af7c9 1115 qemu_cond_init(env->halt_cond);
7e97cd88 1116 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
0ab07c62 1117 while (env->created == 0) {
18a85728 1118 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1119 }
296af7c9
BS
1120}
1121
1122void qemu_init_vcpu(void *_env)
1123{
1124 CPUState *env = _env;
1125
1126 env->nr_cores = smp_cores;
1127 env->nr_threads = smp_threads;
fa7d1867 1128 env->stopped = 1;
0ab07c62 1129 if (kvm_enabled()) {
7e97cd88 1130 qemu_kvm_start_vcpu(env);
0ab07c62 1131 } else {
7e97cd88 1132 qemu_tcg_init_vcpu(env);
0ab07c62 1133 }
296af7c9
BS
1134}
1135
1136void qemu_notify_event(void)
1137{
1138 qemu_event_increment();
1139}
1140
b4a3d965 1141void cpu_stop_current(void)
296af7c9 1142{
b4a3d965 1143 if (cpu_single_env) {
67bb172f 1144 cpu_single_env->stop = 0;
b4a3d965
JK
1145 cpu_single_env->stopped = 1;
1146 cpu_exit(cpu_single_env);
67bb172f 1147 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1148 }
296af7c9
BS
1149}
1150
1dfb4dd9 1151void vm_stop(RunState state)
296af7c9 1152{
b7680cb6 1153 if (!qemu_thread_is_self(&io_thread)) {
1dfb4dd9 1154 qemu_system_vmstop_request(state);
296af7c9
BS
1155 /*
1156 * FIXME: should not return to device code in case
1157 * vm_stop() has been requested.
1158 */
b4a3d965 1159 cpu_stop_current();
296af7c9
BS
1160 return;
1161 }
1dfb4dd9 1162 do_vm_stop(state);
296af7c9
BS
1163}
1164
8a9236f1
LC
1165/* does a state transition even if the VM is already stopped,
1166 current state is forgotten forever */
1167void vm_stop_force_state(RunState state)
1168{
1169 if (runstate_is_running()) {
1170 vm_stop(state);
1171 } else {
1172 runstate_set(state);
1173 }
1174}
1175
6792a57b 1176static int tcg_cpu_exec(CPUState *env)
296af7c9
BS
1177{
1178 int ret;
1179#ifdef CONFIG_PROFILER
1180 int64_t ti;
1181#endif
1182
1183#ifdef CONFIG_PROFILER
1184 ti = profile_getclock();
1185#endif
1186 if (use_icount) {
1187 int64_t count;
1188 int decr;
1189 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1190 env->icount_decr.u16.low = 0;
1191 env->icount_extra = 0;
946fb27c 1192 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
296af7c9
BS
1193 qemu_icount += count;
1194 decr = (count > 0xffff) ? 0xffff : count;
1195 count -= decr;
1196 env->icount_decr.u16.low = decr;
1197 env->icount_extra = count;
1198 }
1199 ret = cpu_exec(env);
1200#ifdef CONFIG_PROFILER
1201 qemu_time += profile_getclock() - ti;
1202#endif
1203 if (use_icount) {
1204 /* Fold pending instructions back into the
1205 instruction counter, and clear the interrupt flag. */
1206 qemu_icount -= (env->icount_decr.u16.low
1207 + env->icount_extra);
1208 env->icount_decr.u32 = 0;
1209 env->icount_extra = 0;
1210 }
1211 return ret;
1212}
1213
472fb0c4 1214bool cpu_exec_all(void)
296af7c9 1215{
9a36085b
JK
1216 int r;
1217
ab33fcda
PB
1218 /* Account partial waits to the vm_clock. */
1219 qemu_clock_warp(vm_clock);
1220
0ab07c62 1221 if (next_cpu == NULL) {
296af7c9 1222 next_cpu = first_cpu;
0ab07c62 1223 }
c629a4bc 1224 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
345f4426 1225 CPUState *env = next_cpu;
296af7c9
BS
1226
1227 qemu_clock_enable(vm_clock,
345f4426 1228 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1229
3c638d06 1230 if (cpu_can_run(env)) {
9a36085b 1231 if (kvm_enabled()) {
6792a57b 1232 r = kvm_cpu_exec(env);
9a36085b 1233 qemu_kvm_eat_signals(env);
6792a57b
JK
1234 } else {
1235 r = tcg_cpu_exec(env);
9a36085b
JK
1236 }
1237 if (r == EXCP_DEBUG) {
1009d2ed 1238 cpu_handle_guest_debug(env);
3c638d06
JK
1239 break;
1240 }
df646dfd 1241 } else if (env->stop || env->stopped) {
296af7c9
BS
1242 break;
1243 }
1244 }
c629a4bc 1245 exit_request = 0;
16400322 1246 return !all_cpu_threads_idle();
296af7c9
BS
1247}
1248
1249void set_numa_modes(void)
1250{
1251 CPUState *env;
1252 int i;
1253
1254 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1255 for (i = 0; i < nb_numa_nodes; i++) {
1256 if (node_cpumask[i] & (1 << env->cpu_index)) {
1257 env->numa_node = i;
1258 }
1259 }
1260 }
1261}
1262
1263void set_cpu_log(const char *optarg)
1264{
1265 int mask;
1266 const CPULogItem *item;
1267
1268 mask = cpu_str_to_log_mask(optarg);
1269 if (!mask) {
1270 printf("Log items (comma separated):\n");
1271 for (item = cpu_log_items; item->mask != 0; item++) {
1272 printf("%-10s %s\n", item->name, item->help);
1273 }
1274 exit(1);
1275 }
1276 cpu_set_log(mask);
1277}
29e922b6 1278
c235d738
MF
1279void set_cpu_log_filename(const char *optarg)
1280{
1281 cpu_set_log_filename(optarg);
1282}
1283
9a78eead 1284void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1285{
1286 /* XXX: implement xxx_cpu_list for targets that still miss it */
1287#if defined(cpu_list_id)
1288 cpu_list_id(f, cpu_fprintf, optarg);
1289#elif defined(cpu_list)
1290 cpu_list(f, cpu_fprintf); /* deprecated */
1291#endif
1292}