]> git.proxmox.com Git - qemu.git/blob - cpus.c
51239723fb2393491936c656d04034b0e8e6a0cc
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40
41 #ifndef _WIN32
42 #include "qemu/compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUState *cpu)
66 {
67 if (cpu->stop || cpu->queued_work_first) {
68 return false;
69 }
70 if (cpu->stopped || !runstate_is_running()) {
71 return true;
72 }
73 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74 kvm_halt_in_kernel()) {
75 return false;
76 }
77 return true;
78 }
79
80 static bool all_cpu_threads_idle(void)
81 {
82 CPUState *cpu;
83
84 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
85 if (!cpu_thread_is_idle(cpu)) {
86 return false;
87 }
88 }
89 return true;
90 }
91
92 /***********************************************************/
93 /* guest cycle counter */
94
95 /* Conversion factor from emulated instructions to virtual clock ticks. */
96 static int icount_time_shift;
97 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
98 #define MAX_ICOUNT_SHIFT 10
99 /* Compensate for varying guest execution speed. */
100 static int64_t qemu_icount_bias;
101 static QEMUTimer *icount_rt_timer;
102 static QEMUTimer *icount_vm_timer;
103 static QEMUTimer *icount_warp_timer;
104 static int64_t vm_clock_warp_start;
105 static int64_t qemu_icount;
106
107 typedef struct TimersState {
108 int64_t cpu_ticks_prev;
109 int64_t cpu_ticks_offset;
110 int64_t cpu_clock_offset;
111 int32_t cpu_ticks_enabled;
112 int64_t dummy;
113 } TimersState;
114
115 TimersState timers_state;
116
117 /* Return the virtual CPU time, based on the instruction counter. */
118 int64_t cpu_get_icount(void)
119 {
120 int64_t icount;
121 CPUState *cpu = current_cpu;
122
123 icount = qemu_icount;
124 if (cpu) {
125 CPUArchState *env = cpu->env_ptr;
126 if (!can_do_io(env)) {
127 fprintf(stderr, "Bad clock read\n");
128 }
129 icount -= (env->icount_decr.u16.low + env->icount_extra);
130 }
131 return qemu_icount_bias + (icount << icount_time_shift);
132 }
133
134 /* return the host CPU cycle counter and handle stop/restart */
135 int64_t cpu_get_ticks(void)
136 {
137 if (use_icount) {
138 return cpu_get_icount();
139 }
140 if (!timers_state.cpu_ticks_enabled) {
141 return timers_state.cpu_ticks_offset;
142 } else {
143 int64_t ticks;
144 ticks = cpu_get_real_ticks();
145 if (timers_state.cpu_ticks_prev > ticks) {
146 /* Note: non increasing ticks may happen if the host uses
147 software suspend */
148 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
149 }
150 timers_state.cpu_ticks_prev = ticks;
151 return ticks + timers_state.cpu_ticks_offset;
152 }
153 }
154
155 /* return the host CPU monotonic timer and handle stop/restart */
156 int64_t cpu_get_clock(void)
157 {
158 int64_t ti;
159 if (!timers_state.cpu_ticks_enabled) {
160 return timers_state.cpu_clock_offset;
161 } else {
162 ti = get_clock();
163 return ti + timers_state.cpu_clock_offset;
164 }
165 }
166
167 /* enable cpu_get_ticks() */
168 void cpu_enable_ticks(void)
169 {
170 if (!timers_state.cpu_ticks_enabled) {
171 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
172 timers_state.cpu_clock_offset -= get_clock();
173 timers_state.cpu_ticks_enabled = 1;
174 }
175 }
176
177 /* disable cpu_get_ticks() : the clock is stopped. You must not call
178 cpu_get_ticks() after that. */
179 void cpu_disable_ticks(void)
180 {
181 if (timers_state.cpu_ticks_enabled) {
182 timers_state.cpu_ticks_offset = cpu_get_ticks();
183 timers_state.cpu_clock_offset = cpu_get_clock();
184 timers_state.cpu_ticks_enabled = 0;
185 }
186 }
187
188 /* Correlation between real and virtual time is always going to be
189 fairly approximate, so ignore small variation.
190 When the guest is idle real and virtual time will be aligned in
191 the IO wait loop. */
192 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
193
194 static void icount_adjust(void)
195 {
196 int64_t cur_time;
197 int64_t cur_icount;
198 int64_t delta;
199 static int64_t last_delta;
200 /* If the VM is not running, then do nothing. */
201 if (!runstate_is_running()) {
202 return;
203 }
204 cur_time = cpu_get_clock();
205 cur_icount = qemu_get_clock_ns(vm_clock);
206 delta = cur_icount - cur_time;
207 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
208 if (delta > 0
209 && last_delta + ICOUNT_WOBBLE < delta * 2
210 && icount_time_shift > 0) {
211 /* The guest is getting too far ahead. Slow time down. */
212 icount_time_shift--;
213 }
214 if (delta < 0
215 && last_delta - ICOUNT_WOBBLE > delta * 2
216 && icount_time_shift < MAX_ICOUNT_SHIFT) {
217 /* The guest is getting too far behind. Speed time up. */
218 icount_time_shift++;
219 }
220 last_delta = delta;
221 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
222 }
223
224 static void icount_adjust_rt(void *opaque)
225 {
226 qemu_mod_timer(icount_rt_timer,
227 qemu_get_clock_ms(rt_clock) + 1000);
228 icount_adjust();
229 }
230
231 static void icount_adjust_vm(void *opaque)
232 {
233 qemu_mod_timer(icount_vm_timer,
234 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
235 icount_adjust();
236 }
237
238 static int64_t qemu_icount_round(int64_t count)
239 {
240 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
241 }
242
243 static void icount_warp_rt(void *opaque)
244 {
245 if (vm_clock_warp_start == -1) {
246 return;
247 }
248
249 if (runstate_is_running()) {
250 int64_t clock = qemu_get_clock_ns(rt_clock);
251 int64_t warp_delta = clock - vm_clock_warp_start;
252 if (use_icount == 1) {
253 qemu_icount_bias += warp_delta;
254 } else {
255 /*
256 * In adaptive mode, do not let the vm_clock run too
257 * far ahead of real time.
258 */
259 int64_t cur_time = cpu_get_clock();
260 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
261 int64_t delta = cur_time - cur_icount;
262 qemu_icount_bias += MIN(warp_delta, delta);
263 }
264 if (qemu_clock_expired(vm_clock)) {
265 qemu_notify_event();
266 }
267 }
268 vm_clock_warp_start = -1;
269 }
270
271 void qtest_clock_warp(int64_t dest)
272 {
273 int64_t clock = qemu_get_clock_ns(vm_clock);
274 assert(qtest_enabled());
275 while (clock < dest) {
276 int64_t deadline = qemu_clock_deadline(vm_clock);
277 int64_t warp = MIN(dest - clock, deadline);
278 qemu_icount_bias += warp;
279 qemu_run_timers(vm_clock);
280 clock = qemu_get_clock_ns(vm_clock);
281 }
282 qemu_notify_event();
283 }
284
285 void qemu_clock_warp(QEMUClock *clock)
286 {
287 int64_t deadline;
288
289 /*
290 * There are too many global variables to make the "warp" behavior
291 * applicable to other clocks. But a clock argument removes the
292 * need for if statements all over the place.
293 */
294 if (clock != vm_clock || !use_icount) {
295 return;
296 }
297
298 /*
299 * If the CPUs have been sleeping, advance the vm_clock timer now. This
300 * ensures that the deadline for the timer is computed correctly below.
301 * This also makes sure that the insn counter is synchronized before the
302 * CPU starts running, in case the CPU is woken by an event other than
303 * the earliest vm_clock timer.
304 */
305 icount_warp_rt(NULL);
306 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
307 qemu_del_timer(icount_warp_timer);
308 return;
309 }
310
311 if (qtest_enabled()) {
312 /* When testing, qtest commands advance icount. */
313 return;
314 }
315
316 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
317 deadline = qemu_clock_deadline(vm_clock);
318 if (deadline > 0) {
319 /*
320 * Ensure the vm_clock proceeds even when the virtual CPU goes to
321 * sleep. Otherwise, the CPU might be waiting for a future timer
322 * interrupt to wake it up, but the interrupt never comes because
323 * the vCPU isn't running any insns and thus doesn't advance the
324 * vm_clock.
325 *
326 * An extreme solution for this problem would be to never let VCPUs
327 * sleep in icount mode if there is a pending vm_clock timer; rather
328 * time could just advance to the next vm_clock event. Instead, we
329 * do stop VCPUs and only advance vm_clock after some "real" time,
330 * (related to the time left until the next event) has passed. This
331 * rt_clock timer will do this. This avoids that the warps are too
332 * visible externally---for example, you will not be sending network
333 * packets continuously instead of every 100ms.
334 */
335 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
336 } else {
337 qemu_notify_event();
338 }
339 }
340
341 static const VMStateDescription vmstate_timers = {
342 .name = "timer",
343 .version_id = 2,
344 .minimum_version_id = 1,
345 .minimum_version_id_old = 1,
346 .fields = (VMStateField[]) {
347 VMSTATE_INT64(cpu_ticks_offset, TimersState),
348 VMSTATE_INT64(dummy, TimersState),
349 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
350 VMSTATE_END_OF_LIST()
351 }
352 };
353
354 void configure_icount(const char *option)
355 {
356 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
357 if (!option) {
358 return;
359 }
360
361 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
362 if (strcmp(option, "auto") != 0) {
363 icount_time_shift = strtol(option, NULL, 0);
364 use_icount = 1;
365 return;
366 }
367
368 use_icount = 2;
369
370 /* 125MIPS seems a reasonable initial guess at the guest speed.
371 It will be corrected fairly quickly anyway. */
372 icount_time_shift = 3;
373
374 /* Have both realtime and virtual time triggers for speed adjustment.
375 The realtime trigger catches emulated time passing too slowly,
376 the virtual time trigger catches emulated time passing too fast.
377 Realtime triggers occur even when idle, so use them less frequently
378 than VM triggers. */
379 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
380 qemu_mod_timer(icount_rt_timer,
381 qemu_get_clock_ms(rt_clock) + 1000);
382 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
383 qemu_mod_timer(icount_vm_timer,
384 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
385 }
386
387 /***********************************************************/
388 void hw_error(const char *fmt, ...)
389 {
390 va_list ap;
391 CPUState *cpu;
392
393 va_start(ap, fmt);
394 fprintf(stderr, "qemu: hardware error: ");
395 vfprintf(stderr, fmt, ap);
396 fprintf(stderr, "\n");
397 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
398 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
399 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
400 }
401 va_end(ap);
402 abort();
403 }
404
405 void cpu_synchronize_all_states(void)
406 {
407 CPUState *cpu;
408
409 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
410 cpu_synchronize_state(cpu);
411 }
412 }
413
414 void cpu_synchronize_all_post_reset(void)
415 {
416 CPUState *cpu;
417
418 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
419 cpu_synchronize_post_reset(cpu);
420 }
421 }
422
423 void cpu_synchronize_all_post_init(void)
424 {
425 CPUState *cpu;
426
427 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
428 cpu_synchronize_post_init(cpu);
429 }
430 }
431
432 bool cpu_is_stopped(CPUState *cpu)
433 {
434 return !runstate_is_running() || cpu->stopped;
435 }
436
437 static void do_vm_stop(RunState state)
438 {
439 if (runstate_is_running()) {
440 cpu_disable_ticks();
441 pause_all_vcpus();
442 runstate_set(state);
443 vm_state_notify(0, state);
444 bdrv_drain_all();
445 bdrv_flush_all();
446 monitor_protocol_event(QEVENT_STOP, NULL);
447 }
448 }
449
450 static bool cpu_can_run(CPUState *cpu)
451 {
452 if (cpu->stop) {
453 return false;
454 }
455 if (cpu->stopped || !runstate_is_running()) {
456 return false;
457 }
458 return true;
459 }
460
461 static void cpu_handle_guest_debug(CPUState *cpu)
462 {
463 gdb_set_stop_cpu(cpu);
464 qemu_system_debug_request();
465 cpu->stopped = true;
466 }
467
468 static void cpu_signal(int sig)
469 {
470 if (current_cpu) {
471 cpu_exit(current_cpu);
472 }
473 exit_request = 1;
474 }
475
476 #ifdef CONFIG_LINUX
477 static void sigbus_reraise(void)
478 {
479 sigset_t set;
480 struct sigaction action;
481
482 memset(&action, 0, sizeof(action));
483 action.sa_handler = SIG_DFL;
484 if (!sigaction(SIGBUS, &action, NULL)) {
485 raise(SIGBUS);
486 sigemptyset(&set);
487 sigaddset(&set, SIGBUS);
488 sigprocmask(SIG_UNBLOCK, &set, NULL);
489 }
490 perror("Failed to re-raise SIGBUS!\n");
491 abort();
492 }
493
494 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
495 void *ctx)
496 {
497 if (kvm_on_sigbus(siginfo->ssi_code,
498 (void *)(intptr_t)siginfo->ssi_addr)) {
499 sigbus_reraise();
500 }
501 }
502
503 static void qemu_init_sigbus(void)
504 {
505 struct sigaction action;
506
507 memset(&action, 0, sizeof(action));
508 action.sa_flags = SA_SIGINFO;
509 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
510 sigaction(SIGBUS, &action, NULL);
511
512 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
513 }
514
515 static void qemu_kvm_eat_signals(CPUState *cpu)
516 {
517 struct timespec ts = { 0, 0 };
518 siginfo_t siginfo;
519 sigset_t waitset;
520 sigset_t chkset;
521 int r;
522
523 sigemptyset(&waitset);
524 sigaddset(&waitset, SIG_IPI);
525 sigaddset(&waitset, SIGBUS);
526
527 do {
528 r = sigtimedwait(&waitset, &siginfo, &ts);
529 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
530 perror("sigtimedwait");
531 exit(1);
532 }
533
534 switch (r) {
535 case SIGBUS:
536 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
537 sigbus_reraise();
538 }
539 break;
540 default:
541 break;
542 }
543
544 r = sigpending(&chkset);
545 if (r == -1) {
546 perror("sigpending");
547 exit(1);
548 }
549 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
550 }
551
552 #else /* !CONFIG_LINUX */
553
554 static void qemu_init_sigbus(void)
555 {
556 }
557
558 static void qemu_kvm_eat_signals(CPUState *cpu)
559 {
560 }
561 #endif /* !CONFIG_LINUX */
562
563 #ifndef _WIN32
564 static void dummy_signal(int sig)
565 {
566 }
567
568 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
569 {
570 int r;
571 sigset_t set;
572 struct sigaction sigact;
573
574 memset(&sigact, 0, sizeof(sigact));
575 sigact.sa_handler = dummy_signal;
576 sigaction(SIG_IPI, &sigact, NULL);
577
578 pthread_sigmask(SIG_BLOCK, NULL, &set);
579 sigdelset(&set, SIG_IPI);
580 sigdelset(&set, SIGBUS);
581 r = kvm_set_signal_mask(cpu, &set);
582 if (r) {
583 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
584 exit(1);
585 }
586 }
587
588 static void qemu_tcg_init_cpu_signals(void)
589 {
590 sigset_t set;
591 struct sigaction sigact;
592
593 memset(&sigact, 0, sizeof(sigact));
594 sigact.sa_handler = cpu_signal;
595 sigaction(SIG_IPI, &sigact, NULL);
596
597 sigemptyset(&set);
598 sigaddset(&set, SIG_IPI);
599 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
600 }
601
602 #else /* _WIN32 */
603 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
604 {
605 abort();
606 }
607
608 static void qemu_tcg_init_cpu_signals(void)
609 {
610 }
611 #endif /* _WIN32 */
612
613 static QemuMutex qemu_global_mutex;
614 static QemuCond qemu_io_proceeded_cond;
615 static bool iothread_requesting_mutex;
616
617 static QemuThread io_thread;
618
619 static QemuThread *tcg_cpu_thread;
620 static QemuCond *tcg_halt_cond;
621
622 /* cpu creation */
623 static QemuCond qemu_cpu_cond;
624 /* system init */
625 static QemuCond qemu_pause_cond;
626 static QemuCond qemu_work_cond;
627
628 void qemu_init_cpu_loop(void)
629 {
630 qemu_init_sigbus();
631 qemu_cond_init(&qemu_cpu_cond);
632 qemu_cond_init(&qemu_pause_cond);
633 qemu_cond_init(&qemu_work_cond);
634 qemu_cond_init(&qemu_io_proceeded_cond);
635 qemu_mutex_init(&qemu_global_mutex);
636
637 qemu_thread_get_self(&io_thread);
638 }
639
640 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
641 {
642 struct qemu_work_item wi;
643
644 if (qemu_cpu_is_self(cpu)) {
645 func(data);
646 return;
647 }
648
649 wi.func = func;
650 wi.data = data;
651 wi.free = false;
652 if (cpu->queued_work_first == NULL) {
653 cpu->queued_work_first = &wi;
654 } else {
655 cpu->queued_work_last->next = &wi;
656 }
657 cpu->queued_work_last = &wi;
658 wi.next = NULL;
659 wi.done = false;
660
661 qemu_cpu_kick(cpu);
662 while (!wi.done) {
663 CPUState *self_cpu = current_cpu;
664
665 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
666 current_cpu = self_cpu;
667 }
668 }
669
670 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
671 {
672 struct qemu_work_item *wi;
673
674 if (qemu_cpu_is_self(cpu)) {
675 func(data);
676 return;
677 }
678
679 wi = g_malloc0(sizeof(struct qemu_work_item));
680 wi->func = func;
681 wi->data = data;
682 wi->free = true;
683 if (cpu->queued_work_first == NULL) {
684 cpu->queued_work_first = wi;
685 } else {
686 cpu->queued_work_last->next = wi;
687 }
688 cpu->queued_work_last = wi;
689 wi->next = NULL;
690 wi->done = false;
691
692 qemu_cpu_kick(cpu);
693 }
694
695 static void flush_queued_work(CPUState *cpu)
696 {
697 struct qemu_work_item *wi;
698
699 if (cpu->queued_work_first == NULL) {
700 return;
701 }
702
703 while ((wi = cpu->queued_work_first)) {
704 cpu->queued_work_first = wi->next;
705 wi->func(wi->data);
706 wi->done = true;
707 if (wi->free) {
708 g_free(wi);
709 }
710 }
711 cpu->queued_work_last = NULL;
712 qemu_cond_broadcast(&qemu_work_cond);
713 }
714
715 static void qemu_wait_io_event_common(CPUState *cpu)
716 {
717 if (cpu->stop) {
718 cpu->stop = false;
719 cpu->stopped = true;
720 qemu_cond_signal(&qemu_pause_cond);
721 }
722 flush_queued_work(cpu);
723 cpu->thread_kicked = false;
724 }
725
726 static void qemu_tcg_wait_io_event(void)
727 {
728 CPUState *cpu;
729
730 while (all_cpu_threads_idle()) {
731 /* Start accounting real time to the virtual clock if the CPUs
732 are idle. */
733 qemu_clock_warp(vm_clock);
734 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
735 }
736
737 while (iothread_requesting_mutex) {
738 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
739 }
740
741 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
742 qemu_wait_io_event_common(cpu);
743 }
744 }
745
746 static void qemu_kvm_wait_io_event(CPUState *cpu)
747 {
748 while (cpu_thread_is_idle(cpu)) {
749 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
750 }
751
752 qemu_kvm_eat_signals(cpu);
753 qemu_wait_io_event_common(cpu);
754 }
755
756 static void *qemu_kvm_cpu_thread_fn(void *arg)
757 {
758 CPUState *cpu = arg;
759 int r;
760
761 qemu_mutex_lock(&qemu_global_mutex);
762 qemu_thread_get_self(cpu->thread);
763 cpu->thread_id = qemu_get_thread_id();
764 current_cpu = cpu;
765
766 r = kvm_init_vcpu(cpu);
767 if (r < 0) {
768 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
769 exit(1);
770 }
771
772 qemu_kvm_init_cpu_signals(cpu);
773
774 /* signal CPU creation */
775 cpu->created = true;
776 qemu_cond_signal(&qemu_cpu_cond);
777
778 while (1) {
779 if (cpu_can_run(cpu)) {
780 r = kvm_cpu_exec(cpu);
781 if (r == EXCP_DEBUG) {
782 cpu_handle_guest_debug(cpu);
783 }
784 }
785 qemu_kvm_wait_io_event(cpu);
786 }
787
788 return NULL;
789 }
790
791 static void *qemu_dummy_cpu_thread_fn(void *arg)
792 {
793 #ifdef _WIN32
794 fprintf(stderr, "qtest is not supported under Windows\n");
795 exit(1);
796 #else
797 CPUState *cpu = arg;
798 sigset_t waitset;
799 int r;
800
801 qemu_mutex_lock_iothread();
802 qemu_thread_get_self(cpu->thread);
803 cpu->thread_id = qemu_get_thread_id();
804
805 sigemptyset(&waitset);
806 sigaddset(&waitset, SIG_IPI);
807
808 /* signal CPU creation */
809 cpu->created = true;
810 qemu_cond_signal(&qemu_cpu_cond);
811
812 current_cpu = cpu;
813 while (1) {
814 current_cpu = NULL;
815 qemu_mutex_unlock_iothread();
816 do {
817 int sig;
818 r = sigwait(&waitset, &sig);
819 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
820 if (r == -1) {
821 perror("sigwait");
822 exit(1);
823 }
824 qemu_mutex_lock_iothread();
825 current_cpu = cpu;
826 qemu_wait_io_event_common(cpu);
827 }
828
829 return NULL;
830 #endif
831 }
832
833 static void tcg_exec_all(void);
834
835 static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
836 {
837 cpu->thread_id = qemu_get_thread_id();
838 cpu->created = true;
839 }
840
841 static void *qemu_tcg_cpu_thread_fn(void *arg)
842 {
843 CPUState *cpu = arg;
844
845 qemu_tcg_init_cpu_signals();
846 qemu_thread_get_self(cpu->thread);
847
848 qemu_mutex_lock(&qemu_global_mutex);
849 qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
850 qemu_cond_signal(&qemu_cpu_cond);
851
852 /* wait for initial kick-off after machine start */
853 while (first_cpu->stopped) {
854 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
855
856 /* process any pending work */
857 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
858 qemu_wait_io_event_common(cpu);
859 }
860 }
861
862 while (1) {
863 tcg_exec_all();
864 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
865 qemu_notify_event();
866 }
867 qemu_tcg_wait_io_event();
868 }
869
870 return NULL;
871 }
872
873 static void qemu_cpu_kick_thread(CPUState *cpu)
874 {
875 #ifndef _WIN32
876 int err;
877
878 err = pthread_kill(cpu->thread->thread, SIG_IPI);
879 if (err) {
880 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
881 exit(1);
882 }
883 #else /* _WIN32 */
884 if (!qemu_cpu_is_self(cpu)) {
885 CONTEXT tcgContext;
886
887 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
888 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
889 GetLastError());
890 exit(1);
891 }
892
893 /* On multi-core systems, we are not sure that the thread is actually
894 * suspended until we can get the context.
895 */
896 tcgContext.ContextFlags = CONTEXT_CONTROL;
897 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
898 continue;
899 }
900
901 cpu_signal(0);
902
903 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
904 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
905 GetLastError());
906 exit(1);
907 }
908 }
909 #endif
910 }
911
912 void qemu_cpu_kick(CPUState *cpu)
913 {
914 qemu_cond_broadcast(cpu->halt_cond);
915 if (!tcg_enabled() && !cpu->thread_kicked) {
916 qemu_cpu_kick_thread(cpu);
917 cpu->thread_kicked = true;
918 }
919 }
920
921 void qemu_cpu_kick_self(void)
922 {
923 #ifndef _WIN32
924 assert(current_cpu);
925
926 if (!current_cpu->thread_kicked) {
927 qemu_cpu_kick_thread(current_cpu);
928 current_cpu->thread_kicked = true;
929 }
930 #else
931 abort();
932 #endif
933 }
934
935 bool qemu_cpu_is_self(CPUState *cpu)
936 {
937 return qemu_thread_is_self(cpu->thread);
938 }
939
940 static bool qemu_in_vcpu_thread(void)
941 {
942 return current_cpu && qemu_cpu_is_self(current_cpu);
943 }
944
945 void qemu_mutex_lock_iothread(void)
946 {
947 if (!tcg_enabled()) {
948 qemu_mutex_lock(&qemu_global_mutex);
949 } else {
950 iothread_requesting_mutex = true;
951 if (qemu_mutex_trylock(&qemu_global_mutex)) {
952 qemu_cpu_kick_thread(first_cpu);
953 qemu_mutex_lock(&qemu_global_mutex);
954 }
955 iothread_requesting_mutex = false;
956 qemu_cond_broadcast(&qemu_io_proceeded_cond);
957 }
958 }
959
960 void qemu_mutex_unlock_iothread(void)
961 {
962 qemu_mutex_unlock(&qemu_global_mutex);
963 }
964
965 static int all_vcpus_paused(void)
966 {
967 CPUState *cpu = first_cpu;
968
969 while (cpu) {
970 if (!cpu->stopped) {
971 return 0;
972 }
973 cpu = cpu->next_cpu;
974 }
975
976 return 1;
977 }
978
979 void pause_all_vcpus(void)
980 {
981 CPUState *cpu = first_cpu;
982
983 qemu_clock_enable(vm_clock, false);
984 while (cpu) {
985 cpu->stop = true;
986 qemu_cpu_kick(cpu);
987 cpu = cpu->next_cpu;
988 }
989
990 if (qemu_in_vcpu_thread()) {
991 cpu_stop_current();
992 if (!kvm_enabled()) {
993 cpu = first_cpu;
994 while (cpu) {
995 cpu->stop = false;
996 cpu->stopped = true;
997 cpu = cpu->next_cpu;
998 }
999 return;
1000 }
1001 }
1002
1003 while (!all_vcpus_paused()) {
1004 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1005 cpu = first_cpu;
1006 while (cpu) {
1007 qemu_cpu_kick(cpu);
1008 cpu = cpu->next_cpu;
1009 }
1010 }
1011 }
1012
1013 void cpu_resume(CPUState *cpu)
1014 {
1015 cpu->stop = false;
1016 cpu->stopped = false;
1017 qemu_cpu_kick(cpu);
1018 }
1019
1020 void resume_all_vcpus(void)
1021 {
1022 CPUState *cpu = first_cpu;
1023
1024 qemu_clock_enable(vm_clock, true);
1025 while (cpu) {
1026 cpu_resume(cpu);
1027 cpu = cpu->next_cpu;
1028 }
1029 }
1030
1031 static void qemu_tcg_init_vcpu(CPUState *cpu)
1032 {
1033 /* share a single thread for all cpus with TCG */
1034 if (!tcg_cpu_thread) {
1035 cpu->thread = g_malloc0(sizeof(QemuThread));
1036 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1037 qemu_cond_init(cpu->halt_cond);
1038 tcg_halt_cond = cpu->halt_cond;
1039 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1040 QEMU_THREAD_JOINABLE);
1041 #ifdef _WIN32
1042 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1043 #endif
1044 while (!cpu->created) {
1045 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1046 }
1047 tcg_cpu_thread = cpu->thread;
1048 } else {
1049 cpu->thread = tcg_cpu_thread;
1050 cpu->halt_cond = tcg_halt_cond;
1051 }
1052 }
1053
1054 static void qemu_kvm_start_vcpu(CPUState *cpu)
1055 {
1056 cpu->thread = g_malloc0(sizeof(QemuThread));
1057 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1058 qemu_cond_init(cpu->halt_cond);
1059 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1060 QEMU_THREAD_JOINABLE);
1061 while (!cpu->created) {
1062 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1063 }
1064 }
1065
1066 static void qemu_dummy_start_vcpu(CPUState *cpu)
1067 {
1068 cpu->thread = g_malloc0(sizeof(QemuThread));
1069 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1070 qemu_cond_init(cpu->halt_cond);
1071 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1072 QEMU_THREAD_JOINABLE);
1073 while (!cpu->created) {
1074 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1075 }
1076 }
1077
1078 void qemu_init_vcpu(CPUState *cpu)
1079 {
1080 cpu->nr_cores = smp_cores;
1081 cpu->nr_threads = smp_threads;
1082 cpu->stopped = true;
1083 if (kvm_enabled()) {
1084 qemu_kvm_start_vcpu(cpu);
1085 } else if (tcg_enabled()) {
1086 qemu_tcg_init_vcpu(cpu);
1087 } else {
1088 qemu_dummy_start_vcpu(cpu);
1089 }
1090 }
1091
1092 void cpu_stop_current(void)
1093 {
1094 if (current_cpu) {
1095 current_cpu->stop = false;
1096 current_cpu->stopped = true;
1097 cpu_exit(current_cpu);
1098 qemu_cond_signal(&qemu_pause_cond);
1099 }
1100 }
1101
1102 void vm_stop(RunState state)
1103 {
1104 if (qemu_in_vcpu_thread()) {
1105 qemu_system_vmstop_request(state);
1106 /*
1107 * FIXME: should not return to device code in case
1108 * vm_stop() has been requested.
1109 */
1110 cpu_stop_current();
1111 return;
1112 }
1113 do_vm_stop(state);
1114 }
1115
1116 /* does a state transition even if the VM is already stopped,
1117 current state is forgotten forever */
1118 void vm_stop_force_state(RunState state)
1119 {
1120 if (runstate_is_running()) {
1121 vm_stop(state);
1122 } else {
1123 runstate_set(state);
1124 }
1125 }
1126
1127 static int tcg_cpu_exec(CPUArchState *env)
1128 {
1129 int ret;
1130 #ifdef CONFIG_PROFILER
1131 int64_t ti;
1132 #endif
1133
1134 #ifdef CONFIG_PROFILER
1135 ti = profile_getclock();
1136 #endif
1137 if (use_icount) {
1138 int64_t count;
1139 int decr;
1140 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1141 env->icount_decr.u16.low = 0;
1142 env->icount_extra = 0;
1143 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1144 qemu_icount += count;
1145 decr = (count > 0xffff) ? 0xffff : count;
1146 count -= decr;
1147 env->icount_decr.u16.low = decr;
1148 env->icount_extra = count;
1149 }
1150 ret = cpu_exec(env);
1151 #ifdef CONFIG_PROFILER
1152 qemu_time += profile_getclock() - ti;
1153 #endif
1154 if (use_icount) {
1155 /* Fold pending instructions back into the
1156 instruction counter, and clear the interrupt flag. */
1157 qemu_icount -= (env->icount_decr.u16.low
1158 + env->icount_extra);
1159 env->icount_decr.u32 = 0;
1160 env->icount_extra = 0;
1161 }
1162 return ret;
1163 }
1164
1165 static void tcg_exec_all(void)
1166 {
1167 int r;
1168
1169 /* Account partial waits to the vm_clock. */
1170 qemu_clock_warp(vm_clock);
1171
1172 if (next_cpu == NULL) {
1173 next_cpu = first_cpu;
1174 }
1175 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1176 CPUState *cpu = next_cpu;
1177 CPUArchState *env = cpu->env_ptr;
1178
1179 qemu_clock_enable(vm_clock,
1180 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1181
1182 if (cpu_can_run(cpu)) {
1183 r = tcg_cpu_exec(env);
1184 if (r == EXCP_DEBUG) {
1185 cpu_handle_guest_debug(cpu);
1186 break;
1187 }
1188 } else if (cpu->stop || cpu->stopped) {
1189 break;
1190 }
1191 }
1192 exit_request = 0;
1193 }
1194
1195 void set_numa_modes(void)
1196 {
1197 CPUState *cpu;
1198 int i;
1199
1200 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1201 for (i = 0; i < nb_numa_nodes; i++) {
1202 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1203 cpu->numa_node = i;
1204 }
1205 }
1206 }
1207 }
1208
1209 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1210 {
1211 /* XXX: implement xxx_cpu_list for targets that still miss it */
1212 #if defined(cpu_list)
1213 cpu_list(f, cpu_fprintf);
1214 #endif
1215 }
1216
1217 CpuInfoList *qmp_query_cpus(Error **errp)
1218 {
1219 CpuInfoList *head = NULL, *cur_item = NULL;
1220 CPUState *cpu;
1221
1222 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1223 CpuInfoList *info;
1224 #if defined(TARGET_I386)
1225 X86CPU *x86_cpu = X86_CPU(cpu);
1226 CPUX86State *env = &x86_cpu->env;
1227 #elif defined(TARGET_PPC)
1228 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1229 CPUPPCState *env = &ppc_cpu->env;
1230 #elif defined(TARGET_SPARC)
1231 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1232 CPUSPARCState *env = &sparc_cpu->env;
1233 #elif defined(TARGET_MIPS)
1234 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1235 CPUMIPSState *env = &mips_cpu->env;
1236 #endif
1237
1238 cpu_synchronize_state(cpu);
1239
1240 info = g_malloc0(sizeof(*info));
1241 info->value = g_malloc0(sizeof(*info->value));
1242 info->value->CPU = cpu->cpu_index;
1243 info->value->current = (cpu == first_cpu);
1244 info->value->halted = cpu->halted;
1245 info->value->thread_id = cpu->thread_id;
1246 #if defined(TARGET_I386)
1247 info->value->has_pc = true;
1248 info->value->pc = env->eip + env->segs[R_CS].base;
1249 #elif defined(TARGET_PPC)
1250 info->value->has_nip = true;
1251 info->value->nip = env->nip;
1252 #elif defined(TARGET_SPARC)
1253 info->value->has_pc = true;
1254 info->value->pc = env->pc;
1255 info->value->has_npc = true;
1256 info->value->npc = env->npc;
1257 #elif defined(TARGET_MIPS)
1258 info->value->has_PC = true;
1259 info->value->PC = env->active_tc.PC;
1260 #endif
1261
1262 /* XXX: waiting for the qapi to support GSList */
1263 if (!cur_item) {
1264 head = cur_item = info;
1265 } else {
1266 cur_item->next = info;
1267 cur_item = info;
1268 }
1269 }
1270
1271 return head;
1272 }
1273
1274 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1275 bool has_cpu, int64_t cpu_index, Error **errp)
1276 {
1277 FILE *f;
1278 uint32_t l;
1279 CPUArchState *env;
1280 CPUState *cpu;
1281 uint8_t buf[1024];
1282
1283 if (!has_cpu) {
1284 cpu_index = 0;
1285 }
1286
1287 cpu = qemu_get_cpu(cpu_index);
1288 if (cpu == NULL) {
1289 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1290 "a CPU number");
1291 return;
1292 }
1293 env = cpu->env_ptr;
1294
1295 f = fopen(filename, "wb");
1296 if (!f) {
1297 error_setg_file_open(errp, errno, filename);
1298 return;
1299 }
1300
1301 while (size != 0) {
1302 l = sizeof(buf);
1303 if (l > size)
1304 l = size;
1305 cpu_memory_rw_debug(env, addr, buf, l, 0);
1306 if (fwrite(buf, 1, l, f) != l) {
1307 error_set(errp, QERR_IO_ERROR);
1308 goto exit;
1309 }
1310 addr += l;
1311 size -= l;
1312 }
1313
1314 exit:
1315 fclose(f);
1316 }
1317
1318 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1319 Error **errp)
1320 {
1321 FILE *f;
1322 uint32_t l;
1323 uint8_t buf[1024];
1324
1325 f = fopen(filename, "wb");
1326 if (!f) {
1327 error_setg_file_open(errp, errno, filename);
1328 return;
1329 }
1330
1331 while (size != 0) {
1332 l = sizeof(buf);
1333 if (l > size)
1334 l = size;
1335 cpu_physical_memory_rw(addr, buf, l, 0);
1336 if (fwrite(buf, 1, l, f) != l) {
1337 error_set(errp, QERR_IO_ERROR);
1338 goto exit;
1339 }
1340 addr += l;
1341 size -= l;
1342 }
1343
1344 exit:
1345 fclose(f);
1346 }
1347
1348 void qmp_inject_nmi(Error **errp)
1349 {
1350 #if defined(TARGET_I386)
1351 CPUState *cs;
1352
1353 for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
1354 X86CPU *cpu = X86_CPU(cs);
1355 CPUX86State *env = &cpu->env;
1356
1357 if (!env->apic_state) {
1358 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1359 } else {
1360 apic_deliver_nmi(env->apic_state);
1361 }
1362 }
1363 #else
1364 error_set(errp, QERR_UNSUPPORTED);
1365 #endif
1366 }