]> git.proxmox.com Git - qemu.git/blob - cpus.c
qemu-timer: move icount to cpus.c
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor.h"
29 #include "sysemu.h"
30 #include "gdbstub.h"
31 #include "dma.h"
32 #include "kvm.h"
33
34 #include "qemu-thread.h"
35 #include "cpus.h"
36
37 #ifndef _WIN32
38 #include "compatfd.h"
39 #endif
40
41 #ifdef SIGRTMIN
42 #define SIG_IPI (SIGRTMIN+4)
43 #else
44 #define SIG_IPI SIGUSR1
45 #endif
46
47 #ifdef CONFIG_LINUX
48
49 #include <sys/prctl.h>
50
51 #ifndef PR_MCE_KILL
52 #define PR_MCE_KILL 33
53 #endif
54
55 #ifndef PR_MCE_KILL_SET
56 #define PR_MCE_KILL_SET 1
57 #endif
58
59 #ifndef PR_MCE_KILL_EARLY
60 #define PR_MCE_KILL_EARLY 1
61 #endif
62
63 #endif /* CONFIG_LINUX */
64
65 static CPUState *next_cpu;
66
67 /***********************************************************/
68 /* guest cycle counter */
69
70 /* Conversion factor from emulated instructions to virtual clock ticks. */
71 static int icount_time_shift;
72 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
73 #define MAX_ICOUNT_SHIFT 10
74 /* Compensate for varying guest execution speed. */
75 static int64_t qemu_icount_bias;
76 static QEMUTimer *icount_rt_timer;
77 static QEMUTimer *icount_vm_timer;
78 static QEMUTimer *icount_warp_timer;
79 static int64_t vm_clock_warp_start;
80 static int64_t qemu_icount;
81
82 typedef struct TimersState {
83 int64_t cpu_ticks_prev;
84 int64_t cpu_ticks_offset;
85 int64_t cpu_clock_offset;
86 int32_t cpu_ticks_enabled;
87 int64_t dummy;
88 } TimersState;
89
90 TimersState timers_state;
91
92 /* Return the virtual CPU time, based on the instruction counter. */
93 int64_t cpu_get_icount(void)
94 {
95 int64_t icount;
96 CPUState *env = cpu_single_env;;
97
98 icount = qemu_icount;
99 if (env) {
100 if (!can_do_io(env)) {
101 fprintf(stderr, "Bad clock read\n");
102 }
103 icount -= (env->icount_decr.u16.low + env->icount_extra);
104 }
105 return qemu_icount_bias + (icount << icount_time_shift);
106 }
107
108 /* return the host CPU cycle counter and handle stop/restart */
109 int64_t cpu_get_ticks(void)
110 {
111 if (use_icount) {
112 return cpu_get_icount();
113 }
114 if (!timers_state.cpu_ticks_enabled) {
115 return timers_state.cpu_ticks_offset;
116 } else {
117 int64_t ticks;
118 ticks = cpu_get_real_ticks();
119 if (timers_state.cpu_ticks_prev > ticks) {
120 /* Note: non increasing ticks may happen if the host uses
121 software suspend */
122 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
123 }
124 timers_state.cpu_ticks_prev = ticks;
125 return ticks + timers_state.cpu_ticks_offset;
126 }
127 }
128
129 /* return the host CPU monotonic timer and handle stop/restart */
130 int64_t cpu_get_clock(void)
131 {
132 int64_t ti;
133 if (!timers_state.cpu_ticks_enabled) {
134 return timers_state.cpu_clock_offset;
135 } else {
136 ti = get_clock();
137 return ti + timers_state.cpu_clock_offset;
138 }
139 }
140
141 /* enable cpu_get_ticks() */
142 void cpu_enable_ticks(void)
143 {
144 if (!timers_state.cpu_ticks_enabled) {
145 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
146 timers_state.cpu_clock_offset -= get_clock();
147 timers_state.cpu_ticks_enabled = 1;
148 }
149 }
150
151 /* disable cpu_get_ticks() : the clock is stopped. You must not call
152 cpu_get_ticks() after that. */
153 void cpu_disable_ticks(void)
154 {
155 if (timers_state.cpu_ticks_enabled) {
156 timers_state.cpu_ticks_offset = cpu_get_ticks();
157 timers_state.cpu_clock_offset = cpu_get_clock();
158 timers_state.cpu_ticks_enabled = 0;
159 }
160 }
161
162 /* Correlation between real and virtual time is always going to be
163 fairly approximate, so ignore small variation.
164 When the guest is idle real and virtual time will be aligned in
165 the IO wait loop. */
166 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
167
168 static void icount_adjust(void)
169 {
170 int64_t cur_time;
171 int64_t cur_icount;
172 int64_t delta;
173 static int64_t last_delta;
174 /* If the VM is not running, then do nothing. */
175 if (!runstate_is_running()) {
176 return;
177 }
178 cur_time = cpu_get_clock();
179 cur_icount = qemu_get_clock_ns(vm_clock);
180 delta = cur_icount - cur_time;
181 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
182 if (delta > 0
183 && last_delta + ICOUNT_WOBBLE < delta * 2
184 && icount_time_shift > 0) {
185 /* The guest is getting too far ahead. Slow time down. */
186 icount_time_shift--;
187 }
188 if (delta < 0
189 && last_delta - ICOUNT_WOBBLE > delta * 2
190 && icount_time_shift < MAX_ICOUNT_SHIFT) {
191 /* The guest is getting too far behind. Speed time up. */
192 icount_time_shift++;
193 }
194 last_delta = delta;
195 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
196 }
197
198 static void icount_adjust_rt(void *opaque)
199 {
200 qemu_mod_timer(icount_rt_timer,
201 qemu_get_clock_ms(rt_clock) + 1000);
202 icount_adjust();
203 }
204
205 static void icount_adjust_vm(void *opaque)
206 {
207 qemu_mod_timer(icount_vm_timer,
208 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
209 icount_adjust();
210 }
211
212 static int64_t qemu_icount_round(int64_t count)
213 {
214 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
215 }
216
217 static void icount_warp_rt(void *opaque)
218 {
219 if (vm_clock_warp_start == -1) {
220 return;
221 }
222
223 if (runstate_is_running()) {
224 int64_t clock = qemu_get_clock_ns(rt_clock);
225 int64_t warp_delta = clock - vm_clock_warp_start;
226 if (use_icount == 1) {
227 qemu_icount_bias += warp_delta;
228 } else {
229 /*
230 * In adaptive mode, do not let the vm_clock run too
231 * far ahead of real time.
232 */
233 int64_t cur_time = cpu_get_clock();
234 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
235 int64_t delta = cur_time - cur_icount;
236 qemu_icount_bias += MIN(warp_delta, delta);
237 }
238 if (qemu_clock_expired(vm_clock)) {
239 qemu_notify_event();
240 }
241 }
242 vm_clock_warp_start = -1;
243 }
244
245 void qemu_clock_warp(QEMUClock *clock)
246 {
247 int64_t deadline;
248
249 /*
250 * There are too many global variables to make the "warp" behavior
251 * applicable to other clocks. But a clock argument removes the
252 * need for if statements all over the place.
253 */
254 if (clock != vm_clock || !use_icount) {
255 return;
256 }
257
258 /*
259 * If the CPUs have been sleeping, advance the vm_clock timer now. This
260 * ensures that the deadline for the timer is computed correctly below.
261 * This also makes sure that the insn counter is synchronized before the
262 * CPU starts running, in case the CPU is woken by an event other than
263 * the earliest vm_clock timer.
264 */
265 icount_warp_rt(NULL);
266 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
267 qemu_del_timer(icount_warp_timer);
268 return;
269 }
270
271 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
272 deadline = qemu_clock_deadline(vm_clock);
273 if (deadline > 0) {
274 /*
275 * Ensure the vm_clock proceeds even when the virtual CPU goes to
276 * sleep. Otherwise, the CPU might be waiting for a future timer
277 * interrupt to wake it up, but the interrupt never comes because
278 * the vCPU isn't running any insns and thus doesn't advance the
279 * vm_clock.
280 *
281 * An extreme solution for this problem would be to never let VCPUs
282 * sleep in icount mode if there is a pending vm_clock timer; rather
283 * time could just advance to the next vm_clock event. Instead, we
284 * do stop VCPUs and only advance vm_clock after some "real" time,
285 * (related to the time left until the next event) has passed. This
286 * rt_clock timer will do this. This avoids that the warps are too
287 * visible externally---for example, you will not be sending network
288 * packets continously instead of every 100ms.
289 */
290 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
291 } else {
292 qemu_notify_event();
293 }
294 }
295
296 static const VMStateDescription vmstate_timers = {
297 .name = "timer",
298 .version_id = 2,
299 .minimum_version_id = 1,
300 .minimum_version_id_old = 1,
301 .fields = (VMStateField[]) {
302 VMSTATE_INT64(cpu_ticks_offset, TimersState),
303 VMSTATE_INT64(dummy, TimersState),
304 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
305 VMSTATE_END_OF_LIST()
306 }
307 };
308
309 void configure_icount(const char *option)
310 {
311 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
312 if (!option) {
313 return;
314 }
315
316 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
317 if (strcmp(option, "auto") != 0) {
318 icount_time_shift = strtol(option, NULL, 0);
319 use_icount = 1;
320 return;
321 }
322
323 use_icount = 2;
324
325 /* 125MIPS seems a reasonable initial guess at the guest speed.
326 It will be corrected fairly quickly anyway. */
327 icount_time_shift = 3;
328
329 /* Have both realtime and virtual time triggers for speed adjustment.
330 The realtime trigger catches emulated time passing too slowly,
331 the virtual time trigger catches emulated time passing too fast.
332 Realtime triggers occur even when idle, so use them less frequently
333 than VM triggers. */
334 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
335 qemu_mod_timer(icount_rt_timer,
336 qemu_get_clock_ms(rt_clock) + 1000);
337 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
338 qemu_mod_timer(icount_vm_timer,
339 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
340 }
341
342 /***********************************************************/
343 void hw_error(const char *fmt, ...)
344 {
345 va_list ap;
346 CPUState *env;
347
348 va_start(ap, fmt);
349 fprintf(stderr, "qemu: hardware error: ");
350 vfprintf(stderr, fmt, ap);
351 fprintf(stderr, "\n");
352 for(env = first_cpu; env != NULL; env = env->next_cpu) {
353 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
354 #ifdef TARGET_I386
355 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
356 #else
357 cpu_dump_state(env, stderr, fprintf, 0);
358 #endif
359 }
360 va_end(ap);
361 abort();
362 }
363
364 void cpu_synchronize_all_states(void)
365 {
366 CPUState *cpu;
367
368 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
369 cpu_synchronize_state(cpu);
370 }
371 }
372
373 void cpu_synchronize_all_post_reset(void)
374 {
375 CPUState *cpu;
376
377 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
378 cpu_synchronize_post_reset(cpu);
379 }
380 }
381
382 void cpu_synchronize_all_post_init(void)
383 {
384 CPUState *cpu;
385
386 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
387 cpu_synchronize_post_init(cpu);
388 }
389 }
390
391 int cpu_is_stopped(CPUState *env)
392 {
393 return !runstate_is_running() || env->stopped;
394 }
395
396 static void do_vm_stop(RunState state)
397 {
398 if (runstate_is_running()) {
399 cpu_disable_ticks();
400 pause_all_vcpus();
401 runstate_set(state);
402 vm_state_notify(0, state);
403 qemu_aio_flush();
404 bdrv_flush_all();
405 monitor_protocol_event(QEVENT_STOP, NULL);
406 }
407 }
408
409 static int cpu_can_run(CPUState *env)
410 {
411 if (env->stop) {
412 return 0;
413 }
414 if (env->stopped || !runstate_is_running()) {
415 return 0;
416 }
417 return 1;
418 }
419
420 static bool cpu_thread_is_idle(CPUState *env)
421 {
422 if (env->stop || env->queued_work_first) {
423 return false;
424 }
425 if (env->stopped || !runstate_is_running()) {
426 return true;
427 }
428 if (!env->halted || qemu_cpu_has_work(env) ||
429 (kvm_enabled() && kvm_irqchip_in_kernel())) {
430 return false;
431 }
432 return true;
433 }
434
435 bool all_cpu_threads_idle(void)
436 {
437 CPUState *env;
438
439 for (env = first_cpu; env != NULL; env = env->next_cpu) {
440 if (!cpu_thread_is_idle(env)) {
441 return false;
442 }
443 }
444 return true;
445 }
446
447 static void cpu_handle_guest_debug(CPUState *env)
448 {
449 gdb_set_stop_cpu(env);
450 qemu_system_debug_request();
451 env->stopped = 1;
452 }
453
454 static void cpu_signal(int sig)
455 {
456 if (cpu_single_env) {
457 cpu_exit(cpu_single_env);
458 }
459 exit_request = 1;
460 }
461
462 #ifdef CONFIG_LINUX
463 static void sigbus_reraise(void)
464 {
465 sigset_t set;
466 struct sigaction action;
467
468 memset(&action, 0, sizeof(action));
469 action.sa_handler = SIG_DFL;
470 if (!sigaction(SIGBUS, &action, NULL)) {
471 raise(SIGBUS);
472 sigemptyset(&set);
473 sigaddset(&set, SIGBUS);
474 sigprocmask(SIG_UNBLOCK, &set, NULL);
475 }
476 perror("Failed to re-raise SIGBUS!\n");
477 abort();
478 }
479
480 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
481 void *ctx)
482 {
483 if (kvm_on_sigbus(siginfo->ssi_code,
484 (void *)(intptr_t)siginfo->ssi_addr)) {
485 sigbus_reraise();
486 }
487 }
488
489 static void qemu_init_sigbus(void)
490 {
491 struct sigaction action;
492
493 memset(&action, 0, sizeof(action));
494 action.sa_flags = SA_SIGINFO;
495 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
496 sigaction(SIGBUS, &action, NULL);
497
498 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
499 }
500
501 static void qemu_kvm_eat_signals(CPUState *env)
502 {
503 struct timespec ts = { 0, 0 };
504 siginfo_t siginfo;
505 sigset_t waitset;
506 sigset_t chkset;
507 int r;
508
509 sigemptyset(&waitset);
510 sigaddset(&waitset, SIG_IPI);
511 sigaddset(&waitset, SIGBUS);
512
513 do {
514 r = sigtimedwait(&waitset, &siginfo, &ts);
515 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
516 perror("sigtimedwait");
517 exit(1);
518 }
519
520 switch (r) {
521 case SIGBUS:
522 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
523 sigbus_reraise();
524 }
525 break;
526 default:
527 break;
528 }
529
530 r = sigpending(&chkset);
531 if (r == -1) {
532 perror("sigpending");
533 exit(1);
534 }
535 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
536 }
537
538 #else /* !CONFIG_LINUX */
539
540 static void qemu_init_sigbus(void)
541 {
542 }
543
544 static void qemu_kvm_eat_signals(CPUState *env)
545 {
546 }
547 #endif /* !CONFIG_LINUX */
548
549 #ifndef _WIN32
550 static int io_thread_fd = -1;
551
552 static void qemu_event_increment(void)
553 {
554 /* Write 8 bytes to be compatible with eventfd. */
555 static const uint64_t val = 1;
556 ssize_t ret;
557
558 if (io_thread_fd == -1) {
559 return;
560 }
561 do {
562 ret = write(io_thread_fd, &val, sizeof(val));
563 } while (ret < 0 && errno == EINTR);
564
565 /* EAGAIN is fine, a read must be pending. */
566 if (ret < 0 && errno != EAGAIN) {
567 fprintf(stderr, "qemu_event_increment: write() failed: %s\n",
568 strerror(errno));
569 exit (1);
570 }
571 }
572
573 static void qemu_event_read(void *opaque)
574 {
575 int fd = (intptr_t)opaque;
576 ssize_t len;
577 char buffer[512];
578
579 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
580 do {
581 len = read(fd, buffer, sizeof(buffer));
582 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
583 }
584
585 static int qemu_event_init(void)
586 {
587 int err;
588 int fds[2];
589
590 err = qemu_eventfd(fds);
591 if (err == -1) {
592 return -errno;
593 }
594 err = fcntl_setfl(fds[0], O_NONBLOCK);
595 if (err < 0) {
596 goto fail;
597 }
598 err = fcntl_setfl(fds[1], O_NONBLOCK);
599 if (err < 0) {
600 goto fail;
601 }
602 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
603 (void *)(intptr_t)fds[0]);
604
605 io_thread_fd = fds[1];
606 return 0;
607
608 fail:
609 close(fds[0]);
610 close(fds[1]);
611 return err;
612 }
613
614 static void dummy_signal(int sig)
615 {
616 }
617
618 /* If we have signalfd, we mask out the signals we want to handle and then
619 * use signalfd to listen for them. We rely on whatever the current signal
620 * handler is to dispatch the signals when we receive them.
621 */
622 static void sigfd_handler(void *opaque)
623 {
624 int fd = (intptr_t)opaque;
625 struct qemu_signalfd_siginfo info;
626 struct sigaction action;
627 ssize_t len;
628
629 while (1) {
630 do {
631 len = read(fd, &info, sizeof(info));
632 } while (len == -1 && errno == EINTR);
633
634 if (len == -1 && errno == EAGAIN) {
635 break;
636 }
637
638 if (len != sizeof(info)) {
639 printf("read from sigfd returned %zd: %m\n", len);
640 return;
641 }
642
643 sigaction(info.ssi_signo, NULL, &action);
644 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
645 action.sa_sigaction(info.ssi_signo,
646 (siginfo_t *)&info, NULL);
647 } else if (action.sa_handler) {
648 action.sa_handler(info.ssi_signo);
649 }
650 }
651 }
652
653 static int qemu_signal_init(void)
654 {
655 int sigfd;
656 sigset_t set;
657
658 /*
659 * SIG_IPI must be blocked in the main thread and must not be caught
660 * by sigwait() in the signal thread. Otherwise, the cpu thread will
661 * not catch it reliably.
662 */
663 sigemptyset(&set);
664 sigaddset(&set, SIG_IPI);
665 pthread_sigmask(SIG_BLOCK, &set, NULL);
666
667 sigemptyset(&set);
668 sigaddset(&set, SIGIO);
669 sigaddset(&set, SIGALRM);
670 sigaddset(&set, SIGBUS);
671 pthread_sigmask(SIG_BLOCK, &set, NULL);
672
673 sigfd = qemu_signalfd(&set);
674 if (sigfd == -1) {
675 fprintf(stderr, "failed to create signalfd\n");
676 return -errno;
677 }
678
679 fcntl_setfl(sigfd, O_NONBLOCK);
680
681 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
682 (void *)(intptr_t)sigfd);
683
684 return 0;
685 }
686
687 static void qemu_kvm_init_cpu_signals(CPUState *env)
688 {
689 int r;
690 sigset_t set;
691 struct sigaction sigact;
692
693 memset(&sigact, 0, sizeof(sigact));
694 sigact.sa_handler = dummy_signal;
695 sigaction(SIG_IPI, &sigact, NULL);
696
697 pthread_sigmask(SIG_BLOCK, NULL, &set);
698 sigdelset(&set, SIG_IPI);
699 sigdelset(&set, SIGBUS);
700 r = kvm_set_signal_mask(env, &set);
701 if (r) {
702 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
703 exit(1);
704 }
705
706 sigdelset(&set, SIG_IPI);
707 sigdelset(&set, SIGBUS);
708 r = kvm_set_signal_mask(env, &set);
709 if (r) {
710 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
711 exit(1);
712 }
713 }
714
715 static void qemu_tcg_init_cpu_signals(void)
716 {
717 sigset_t set;
718 struct sigaction sigact;
719
720 memset(&sigact, 0, sizeof(sigact));
721 sigact.sa_handler = cpu_signal;
722 sigaction(SIG_IPI, &sigact, NULL);
723
724 sigemptyset(&set);
725 sigaddset(&set, SIG_IPI);
726 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
727 }
728
729 #else /* _WIN32 */
730
731 HANDLE qemu_event_handle;
732
733 static void dummy_event_handler(void *opaque)
734 {
735 }
736
737 static int qemu_event_init(void)
738 {
739 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
740 if (!qemu_event_handle) {
741 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
742 return -1;
743 }
744 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
745 return 0;
746 }
747
748 static void qemu_event_increment(void)
749 {
750 if (!SetEvent(qemu_event_handle)) {
751 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
752 GetLastError());
753 exit (1);
754 }
755 }
756
757 static int qemu_signal_init(void)
758 {
759 return 0;
760 }
761
762 static void qemu_kvm_init_cpu_signals(CPUState *env)
763 {
764 abort();
765 }
766
767 static void qemu_tcg_init_cpu_signals(void)
768 {
769 }
770 #endif /* _WIN32 */
771
772 QemuMutex qemu_global_mutex;
773 static QemuCond qemu_io_proceeded_cond;
774 static bool iothread_requesting_mutex;
775
776 static QemuThread io_thread;
777
778 static QemuThread *tcg_cpu_thread;
779 static QemuCond *tcg_halt_cond;
780
781 /* cpu creation */
782 static QemuCond qemu_cpu_cond;
783 /* system init */
784 static QemuCond qemu_pause_cond;
785 static QemuCond qemu_work_cond;
786
787 int qemu_init_main_loop(void)
788 {
789 int ret;
790
791 qemu_init_sigbus();
792
793 ret = qemu_signal_init();
794 if (ret) {
795 return ret;
796 }
797
798 /* Note eventfd must be drained before signalfd handlers run */
799 ret = qemu_event_init();
800 if (ret) {
801 return ret;
802 }
803
804 qemu_cond_init(&qemu_cpu_cond);
805 qemu_cond_init(&qemu_pause_cond);
806 qemu_cond_init(&qemu_work_cond);
807 qemu_cond_init(&qemu_io_proceeded_cond);
808 qemu_mutex_init(&qemu_global_mutex);
809 qemu_mutex_lock(&qemu_global_mutex);
810
811 qemu_thread_get_self(&io_thread);
812
813 return 0;
814 }
815
816 void qemu_main_loop_start(void)
817 {
818 resume_all_vcpus();
819 }
820
821 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
822 {
823 struct qemu_work_item wi;
824
825 if (qemu_cpu_is_self(env)) {
826 func(data);
827 return;
828 }
829
830 wi.func = func;
831 wi.data = data;
832 if (!env->queued_work_first) {
833 env->queued_work_first = &wi;
834 } else {
835 env->queued_work_last->next = &wi;
836 }
837 env->queued_work_last = &wi;
838 wi.next = NULL;
839 wi.done = false;
840
841 qemu_cpu_kick(env);
842 while (!wi.done) {
843 CPUState *self_env = cpu_single_env;
844
845 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
846 cpu_single_env = self_env;
847 }
848 }
849
850 static void flush_queued_work(CPUState *env)
851 {
852 struct qemu_work_item *wi;
853
854 if (!env->queued_work_first) {
855 return;
856 }
857
858 while ((wi = env->queued_work_first)) {
859 env->queued_work_first = wi->next;
860 wi->func(wi->data);
861 wi->done = true;
862 }
863 env->queued_work_last = NULL;
864 qemu_cond_broadcast(&qemu_work_cond);
865 }
866
867 static void qemu_wait_io_event_common(CPUState *env)
868 {
869 if (env->stop) {
870 env->stop = 0;
871 env->stopped = 1;
872 qemu_cond_signal(&qemu_pause_cond);
873 }
874 flush_queued_work(env);
875 env->thread_kicked = false;
876 }
877
878 static void qemu_tcg_wait_io_event(void)
879 {
880 CPUState *env;
881
882 while (all_cpu_threads_idle()) {
883 /* Start accounting real time to the virtual clock if the CPUs
884 are idle. */
885 qemu_clock_warp(vm_clock);
886 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
887 }
888
889 while (iothread_requesting_mutex) {
890 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
891 }
892
893 for (env = first_cpu; env != NULL; env = env->next_cpu) {
894 qemu_wait_io_event_common(env);
895 }
896 }
897
898 static void qemu_kvm_wait_io_event(CPUState *env)
899 {
900 while (cpu_thread_is_idle(env)) {
901 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
902 }
903
904 qemu_kvm_eat_signals(env);
905 qemu_wait_io_event_common(env);
906 }
907
908 static void *qemu_kvm_cpu_thread_fn(void *arg)
909 {
910 CPUState *env = arg;
911 int r;
912
913 qemu_mutex_lock(&qemu_global_mutex);
914 qemu_thread_get_self(env->thread);
915 env->thread_id = qemu_get_thread_id();
916
917 r = kvm_init_vcpu(env);
918 if (r < 0) {
919 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
920 exit(1);
921 }
922
923 qemu_kvm_init_cpu_signals(env);
924
925 /* signal CPU creation */
926 env->created = 1;
927 qemu_cond_signal(&qemu_cpu_cond);
928
929 while (1) {
930 if (cpu_can_run(env)) {
931 r = kvm_cpu_exec(env);
932 if (r == EXCP_DEBUG) {
933 cpu_handle_guest_debug(env);
934 }
935 }
936 qemu_kvm_wait_io_event(env);
937 }
938
939 return NULL;
940 }
941
942 static void *qemu_tcg_cpu_thread_fn(void *arg)
943 {
944 CPUState *env = arg;
945
946 qemu_tcg_init_cpu_signals();
947 qemu_thread_get_self(env->thread);
948
949 /* signal CPU creation */
950 qemu_mutex_lock(&qemu_global_mutex);
951 for (env = first_cpu; env != NULL; env = env->next_cpu) {
952 env->thread_id = qemu_get_thread_id();
953 env->created = 1;
954 }
955 qemu_cond_signal(&qemu_cpu_cond);
956
957 /* wait for initial kick-off after machine start */
958 while (first_cpu->stopped) {
959 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
960 }
961
962 while (1) {
963 cpu_exec_all();
964 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
965 qemu_notify_event();
966 }
967 qemu_tcg_wait_io_event();
968 }
969
970 return NULL;
971 }
972
973 static void qemu_cpu_kick_thread(CPUState *env)
974 {
975 #ifndef _WIN32
976 int err;
977
978 err = pthread_kill(env->thread->thread, SIG_IPI);
979 if (err) {
980 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
981 exit(1);
982 }
983 #else /* _WIN32 */
984 if (!qemu_cpu_is_self(env)) {
985 SuspendThread(env->thread->thread);
986 cpu_signal(0);
987 ResumeThread(env->thread->thread);
988 }
989 #endif
990 }
991
992 void qemu_cpu_kick(void *_env)
993 {
994 CPUState *env = _env;
995
996 qemu_cond_broadcast(env->halt_cond);
997 if (kvm_enabled() && !env->thread_kicked) {
998 qemu_cpu_kick_thread(env);
999 env->thread_kicked = true;
1000 }
1001 }
1002
1003 void qemu_cpu_kick_self(void)
1004 {
1005 #ifndef _WIN32
1006 assert(cpu_single_env);
1007
1008 if (!cpu_single_env->thread_kicked) {
1009 qemu_cpu_kick_thread(cpu_single_env);
1010 cpu_single_env->thread_kicked = true;
1011 }
1012 #else
1013 abort();
1014 #endif
1015 }
1016
1017 int qemu_cpu_is_self(void *_env)
1018 {
1019 CPUState *env = _env;
1020
1021 return qemu_thread_is_self(env->thread);
1022 }
1023
1024 void qemu_mutex_lock_iothread(void)
1025 {
1026 if (kvm_enabled()) {
1027 qemu_mutex_lock(&qemu_global_mutex);
1028 } else {
1029 iothread_requesting_mutex = true;
1030 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1031 qemu_cpu_kick_thread(first_cpu);
1032 qemu_mutex_lock(&qemu_global_mutex);
1033 }
1034 iothread_requesting_mutex = false;
1035 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1036 }
1037 }
1038
1039 void qemu_mutex_unlock_iothread(void)
1040 {
1041 qemu_mutex_unlock(&qemu_global_mutex);
1042 }
1043
1044 static int all_vcpus_paused(void)
1045 {
1046 CPUState *penv = first_cpu;
1047
1048 while (penv) {
1049 if (!penv->stopped) {
1050 return 0;
1051 }
1052 penv = (CPUState *)penv->next_cpu;
1053 }
1054
1055 return 1;
1056 }
1057
1058 void pause_all_vcpus(void)
1059 {
1060 CPUState *penv = first_cpu;
1061
1062 while (penv) {
1063 penv->stop = 1;
1064 qemu_cpu_kick(penv);
1065 penv = (CPUState *)penv->next_cpu;
1066 }
1067
1068 while (!all_vcpus_paused()) {
1069 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1070 penv = first_cpu;
1071 while (penv) {
1072 qemu_cpu_kick(penv);
1073 penv = (CPUState *)penv->next_cpu;
1074 }
1075 }
1076 }
1077
1078 void resume_all_vcpus(void)
1079 {
1080 CPUState *penv = first_cpu;
1081
1082 while (penv) {
1083 penv->stop = 0;
1084 penv->stopped = 0;
1085 qemu_cpu_kick(penv);
1086 penv = (CPUState *)penv->next_cpu;
1087 }
1088 }
1089
1090 static void qemu_tcg_init_vcpu(void *_env)
1091 {
1092 CPUState *env = _env;
1093
1094 /* share a single thread for all cpus with TCG */
1095 if (!tcg_cpu_thread) {
1096 env->thread = g_malloc0(sizeof(QemuThread));
1097 env->halt_cond = g_malloc0(sizeof(QemuCond));
1098 qemu_cond_init(env->halt_cond);
1099 tcg_halt_cond = env->halt_cond;
1100 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
1101 while (env->created == 0) {
1102 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1103 }
1104 tcg_cpu_thread = env->thread;
1105 } else {
1106 env->thread = tcg_cpu_thread;
1107 env->halt_cond = tcg_halt_cond;
1108 }
1109 }
1110
1111 static void qemu_kvm_start_vcpu(CPUState *env)
1112 {
1113 env->thread = g_malloc0(sizeof(QemuThread));
1114 env->halt_cond = g_malloc0(sizeof(QemuCond));
1115 qemu_cond_init(env->halt_cond);
1116 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
1117 while (env->created == 0) {
1118 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1119 }
1120 }
1121
1122 void qemu_init_vcpu(void *_env)
1123 {
1124 CPUState *env = _env;
1125
1126 env->nr_cores = smp_cores;
1127 env->nr_threads = smp_threads;
1128 env->stopped = 1;
1129 if (kvm_enabled()) {
1130 qemu_kvm_start_vcpu(env);
1131 } else {
1132 qemu_tcg_init_vcpu(env);
1133 }
1134 }
1135
1136 void qemu_notify_event(void)
1137 {
1138 qemu_event_increment();
1139 }
1140
1141 void cpu_stop_current(void)
1142 {
1143 if (cpu_single_env) {
1144 cpu_single_env->stop = 0;
1145 cpu_single_env->stopped = 1;
1146 cpu_exit(cpu_single_env);
1147 qemu_cond_signal(&qemu_pause_cond);
1148 }
1149 }
1150
1151 void vm_stop(RunState state)
1152 {
1153 if (!qemu_thread_is_self(&io_thread)) {
1154 qemu_system_vmstop_request(state);
1155 /*
1156 * FIXME: should not return to device code in case
1157 * vm_stop() has been requested.
1158 */
1159 cpu_stop_current();
1160 return;
1161 }
1162 do_vm_stop(state);
1163 }
1164
1165 /* does a state transition even if the VM is already stopped,
1166 current state is forgotten forever */
1167 void vm_stop_force_state(RunState state)
1168 {
1169 if (runstate_is_running()) {
1170 vm_stop(state);
1171 } else {
1172 runstate_set(state);
1173 }
1174 }
1175
1176 static int tcg_cpu_exec(CPUState *env)
1177 {
1178 int ret;
1179 #ifdef CONFIG_PROFILER
1180 int64_t ti;
1181 #endif
1182
1183 #ifdef CONFIG_PROFILER
1184 ti = profile_getclock();
1185 #endif
1186 if (use_icount) {
1187 int64_t count;
1188 int decr;
1189 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1190 env->icount_decr.u16.low = 0;
1191 env->icount_extra = 0;
1192 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1193 qemu_icount += count;
1194 decr = (count > 0xffff) ? 0xffff : count;
1195 count -= decr;
1196 env->icount_decr.u16.low = decr;
1197 env->icount_extra = count;
1198 }
1199 ret = cpu_exec(env);
1200 #ifdef CONFIG_PROFILER
1201 qemu_time += profile_getclock() - ti;
1202 #endif
1203 if (use_icount) {
1204 /* Fold pending instructions back into the
1205 instruction counter, and clear the interrupt flag. */
1206 qemu_icount -= (env->icount_decr.u16.low
1207 + env->icount_extra);
1208 env->icount_decr.u32 = 0;
1209 env->icount_extra = 0;
1210 }
1211 return ret;
1212 }
1213
1214 bool cpu_exec_all(void)
1215 {
1216 int r;
1217
1218 /* Account partial waits to the vm_clock. */
1219 qemu_clock_warp(vm_clock);
1220
1221 if (next_cpu == NULL) {
1222 next_cpu = first_cpu;
1223 }
1224 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1225 CPUState *env = next_cpu;
1226
1227 qemu_clock_enable(vm_clock,
1228 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1229
1230 if (cpu_can_run(env)) {
1231 if (kvm_enabled()) {
1232 r = kvm_cpu_exec(env);
1233 qemu_kvm_eat_signals(env);
1234 } else {
1235 r = tcg_cpu_exec(env);
1236 }
1237 if (r == EXCP_DEBUG) {
1238 cpu_handle_guest_debug(env);
1239 break;
1240 }
1241 } else if (env->stop || env->stopped) {
1242 break;
1243 }
1244 }
1245 exit_request = 0;
1246 return !all_cpu_threads_idle();
1247 }
1248
1249 void set_numa_modes(void)
1250 {
1251 CPUState *env;
1252 int i;
1253
1254 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1255 for (i = 0; i < nb_numa_nodes; i++) {
1256 if (node_cpumask[i] & (1 << env->cpu_index)) {
1257 env->numa_node = i;
1258 }
1259 }
1260 }
1261 }
1262
1263 void set_cpu_log(const char *optarg)
1264 {
1265 int mask;
1266 const CPULogItem *item;
1267
1268 mask = cpu_str_to_log_mask(optarg);
1269 if (!mask) {
1270 printf("Log items (comma separated):\n");
1271 for (item = cpu_log_items; item->mask != 0; item++) {
1272 printf("%-10s %s\n", item->name, item->help);
1273 }
1274 exit(1);
1275 }
1276 cpu_set_log(mask);
1277 }
1278
1279 void set_cpu_log_filename(const char *optarg)
1280 {
1281 cpu_set_log_filename(optarg);
1282 }
1283
1284 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1285 {
1286 /* XXX: implement xxx_cpu_list for targets that still miss it */
1287 #if defined(cpu_list_id)
1288 cpu_list_id(f, cpu_fprintf, optarg);
1289 #elif defined(cpu_list)
1290 cpu_list(f, cpu_fprintf); /* deprecated */
1291 #endif
1292 }