]> git.proxmox.com Git - qemu.git/blob - cpus.c
Merge remote-tracking branch 'stefanha/block' into staging
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40
41 #ifndef _WIN32
42 #include "qemu/compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUState *cpu)
66 {
67 if (cpu->stop || cpu->queued_work_first) {
68 return false;
69 }
70 if (cpu->stopped || !runstate_is_running()) {
71 return true;
72 }
73 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74 kvm_halt_in_kernel()) {
75 return false;
76 }
77 return true;
78 }
79
80 static bool all_cpu_threads_idle(void)
81 {
82 CPUState *cpu;
83
84 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
85 if (!cpu_thread_is_idle(cpu)) {
86 return false;
87 }
88 }
89 return true;
90 }
91
92 /***********************************************************/
93 /* guest cycle counter */
94
95 /* Conversion factor from emulated instructions to virtual clock ticks. */
96 static int icount_time_shift;
97 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
98 #define MAX_ICOUNT_SHIFT 10
99 /* Compensate for varying guest execution speed. */
100 static int64_t qemu_icount_bias;
101 static QEMUTimer *icount_rt_timer;
102 static QEMUTimer *icount_vm_timer;
103 static QEMUTimer *icount_warp_timer;
104 static int64_t vm_clock_warp_start;
105 static int64_t qemu_icount;
106
107 typedef struct TimersState {
108 int64_t cpu_ticks_prev;
109 int64_t cpu_ticks_offset;
110 int64_t cpu_clock_offset;
111 int32_t cpu_ticks_enabled;
112 int64_t dummy;
113 } TimersState;
114
115 static TimersState timers_state;
116
117 /* Return the virtual CPU time, based on the instruction counter. */
118 int64_t cpu_get_icount(void)
119 {
120 int64_t icount;
121 CPUState *cpu = current_cpu;
122
123 icount = qemu_icount;
124 if (cpu) {
125 CPUArchState *env = cpu->env_ptr;
126 if (!can_do_io(env)) {
127 fprintf(stderr, "Bad clock read\n");
128 }
129 icount -= (env->icount_decr.u16.low + env->icount_extra);
130 }
131 return qemu_icount_bias + (icount << icount_time_shift);
132 }
133
134 /* return the host CPU cycle counter and handle stop/restart */
135 int64_t cpu_get_ticks(void)
136 {
137 if (use_icount) {
138 return cpu_get_icount();
139 }
140 if (!timers_state.cpu_ticks_enabled) {
141 return timers_state.cpu_ticks_offset;
142 } else {
143 int64_t ticks;
144 ticks = cpu_get_real_ticks();
145 if (timers_state.cpu_ticks_prev > ticks) {
146 /* Note: non increasing ticks may happen if the host uses
147 software suspend */
148 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
149 }
150 timers_state.cpu_ticks_prev = ticks;
151 return ticks + timers_state.cpu_ticks_offset;
152 }
153 }
154
155 /* return the host CPU monotonic timer and handle stop/restart */
156 int64_t cpu_get_clock(void)
157 {
158 int64_t ti;
159 if (!timers_state.cpu_ticks_enabled) {
160 return timers_state.cpu_clock_offset;
161 } else {
162 ti = get_clock();
163 return ti + timers_state.cpu_clock_offset;
164 }
165 }
166
167 /* enable cpu_get_ticks() */
168 void cpu_enable_ticks(void)
169 {
170 if (!timers_state.cpu_ticks_enabled) {
171 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
172 timers_state.cpu_clock_offset -= get_clock();
173 timers_state.cpu_ticks_enabled = 1;
174 }
175 }
176
177 /* disable cpu_get_ticks() : the clock is stopped. You must not call
178 cpu_get_ticks() after that. */
179 void cpu_disable_ticks(void)
180 {
181 if (timers_state.cpu_ticks_enabled) {
182 timers_state.cpu_ticks_offset = cpu_get_ticks();
183 timers_state.cpu_clock_offset = cpu_get_clock();
184 timers_state.cpu_ticks_enabled = 0;
185 }
186 }
187
188 /* Correlation between real and virtual time is always going to be
189 fairly approximate, so ignore small variation.
190 When the guest is idle real and virtual time will be aligned in
191 the IO wait loop. */
192 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
193
194 static void icount_adjust(void)
195 {
196 int64_t cur_time;
197 int64_t cur_icount;
198 int64_t delta;
199 static int64_t last_delta;
200 /* If the VM is not running, then do nothing. */
201 if (!runstate_is_running()) {
202 return;
203 }
204 cur_time = cpu_get_clock();
205 cur_icount = qemu_get_clock_ns(vm_clock);
206 delta = cur_icount - cur_time;
207 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
208 if (delta > 0
209 && last_delta + ICOUNT_WOBBLE < delta * 2
210 && icount_time_shift > 0) {
211 /* The guest is getting too far ahead. Slow time down. */
212 icount_time_shift--;
213 }
214 if (delta < 0
215 && last_delta - ICOUNT_WOBBLE > delta * 2
216 && icount_time_shift < MAX_ICOUNT_SHIFT) {
217 /* The guest is getting too far behind. Speed time up. */
218 icount_time_shift++;
219 }
220 last_delta = delta;
221 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
222 }
223
224 static void icount_adjust_rt(void *opaque)
225 {
226 qemu_mod_timer(icount_rt_timer,
227 qemu_get_clock_ms(rt_clock) + 1000);
228 icount_adjust();
229 }
230
231 static void icount_adjust_vm(void *opaque)
232 {
233 qemu_mod_timer(icount_vm_timer,
234 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
235 icount_adjust();
236 }
237
238 static int64_t qemu_icount_round(int64_t count)
239 {
240 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
241 }
242
243 static void icount_warp_rt(void *opaque)
244 {
245 if (vm_clock_warp_start == -1) {
246 return;
247 }
248
249 if (runstate_is_running()) {
250 int64_t clock = qemu_get_clock_ns(rt_clock);
251 int64_t warp_delta = clock - vm_clock_warp_start;
252 if (use_icount == 1) {
253 qemu_icount_bias += warp_delta;
254 } else {
255 /*
256 * In adaptive mode, do not let the vm_clock run too
257 * far ahead of real time.
258 */
259 int64_t cur_time = cpu_get_clock();
260 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
261 int64_t delta = cur_time - cur_icount;
262 qemu_icount_bias += MIN(warp_delta, delta);
263 }
264 if (qemu_clock_expired(vm_clock)) {
265 qemu_notify_event();
266 }
267 }
268 vm_clock_warp_start = -1;
269 }
270
271 void qtest_clock_warp(int64_t dest)
272 {
273 int64_t clock = qemu_get_clock_ns(vm_clock);
274 assert(qtest_enabled());
275 while (clock < dest) {
276 int64_t deadline = qemu_clock_deadline(vm_clock);
277 int64_t warp = MIN(dest - clock, deadline);
278 qemu_icount_bias += warp;
279 qemu_run_timers(vm_clock);
280 clock = qemu_get_clock_ns(vm_clock);
281 }
282 qemu_notify_event();
283 }
284
285 void qemu_clock_warp(QEMUClock *clock)
286 {
287 int64_t deadline;
288
289 /*
290 * There are too many global variables to make the "warp" behavior
291 * applicable to other clocks. But a clock argument removes the
292 * need for if statements all over the place.
293 */
294 if (clock != vm_clock || !use_icount) {
295 return;
296 }
297
298 /*
299 * If the CPUs have been sleeping, advance the vm_clock timer now. This
300 * ensures that the deadline for the timer is computed correctly below.
301 * This also makes sure that the insn counter is synchronized before the
302 * CPU starts running, in case the CPU is woken by an event other than
303 * the earliest vm_clock timer.
304 */
305 icount_warp_rt(NULL);
306 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
307 qemu_del_timer(icount_warp_timer);
308 return;
309 }
310
311 if (qtest_enabled()) {
312 /* When testing, qtest commands advance icount. */
313 return;
314 }
315
316 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
317 deadline = qemu_clock_deadline(vm_clock);
318 if (deadline > 0) {
319 /*
320 * Ensure the vm_clock proceeds even when the virtual CPU goes to
321 * sleep. Otherwise, the CPU might be waiting for a future timer
322 * interrupt to wake it up, but the interrupt never comes because
323 * the vCPU isn't running any insns and thus doesn't advance the
324 * vm_clock.
325 *
326 * An extreme solution for this problem would be to never let VCPUs
327 * sleep in icount mode if there is a pending vm_clock timer; rather
328 * time could just advance to the next vm_clock event. Instead, we
329 * do stop VCPUs and only advance vm_clock after some "real" time,
330 * (related to the time left until the next event) has passed. This
331 * rt_clock timer will do this. This avoids that the warps are too
332 * visible externally---for example, you will not be sending network
333 * packets continuously instead of every 100ms.
334 */
335 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
336 } else {
337 qemu_notify_event();
338 }
339 }
340
341 static const VMStateDescription vmstate_timers = {
342 .name = "timer",
343 .version_id = 2,
344 .minimum_version_id = 1,
345 .minimum_version_id_old = 1,
346 .fields = (VMStateField[]) {
347 VMSTATE_INT64(cpu_ticks_offset, TimersState),
348 VMSTATE_INT64(dummy, TimersState),
349 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
350 VMSTATE_END_OF_LIST()
351 }
352 };
353
354 void configure_icount(const char *option)
355 {
356 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
357 if (!option) {
358 return;
359 }
360
361 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
362 if (strcmp(option, "auto") != 0) {
363 icount_time_shift = strtol(option, NULL, 0);
364 use_icount = 1;
365 return;
366 }
367
368 use_icount = 2;
369
370 /* 125MIPS seems a reasonable initial guess at the guest speed.
371 It will be corrected fairly quickly anyway. */
372 icount_time_shift = 3;
373
374 /* Have both realtime and virtual time triggers for speed adjustment.
375 The realtime trigger catches emulated time passing too slowly,
376 the virtual time trigger catches emulated time passing too fast.
377 Realtime triggers occur even when idle, so use them less frequently
378 than VM triggers. */
379 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
380 qemu_mod_timer(icount_rt_timer,
381 qemu_get_clock_ms(rt_clock) + 1000);
382 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
383 qemu_mod_timer(icount_vm_timer,
384 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
385 }
386
387 /***********************************************************/
388 void hw_error(const char *fmt, ...)
389 {
390 va_list ap;
391 CPUState *cpu;
392
393 va_start(ap, fmt);
394 fprintf(stderr, "qemu: hardware error: ");
395 vfprintf(stderr, fmt, ap);
396 fprintf(stderr, "\n");
397 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
398 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
399 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
400 }
401 va_end(ap);
402 abort();
403 }
404
405 void cpu_synchronize_all_states(void)
406 {
407 CPUState *cpu;
408
409 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
410 cpu_synchronize_state(cpu);
411 }
412 }
413
414 void cpu_synchronize_all_post_reset(void)
415 {
416 CPUState *cpu;
417
418 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
419 cpu_synchronize_post_reset(cpu);
420 }
421 }
422
423 void cpu_synchronize_all_post_init(void)
424 {
425 CPUState *cpu;
426
427 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
428 cpu_synchronize_post_init(cpu);
429 }
430 }
431
432 bool cpu_is_stopped(CPUState *cpu)
433 {
434 return !runstate_is_running() || cpu->stopped;
435 }
436
437 static int do_vm_stop(RunState state)
438 {
439 int ret = 0;
440
441 if (runstate_is_running()) {
442 cpu_disable_ticks();
443 pause_all_vcpus();
444 runstate_set(state);
445 vm_state_notify(0, state);
446 monitor_protocol_event(QEVENT_STOP, NULL);
447 }
448
449 bdrv_drain_all();
450 ret = bdrv_flush_all();
451
452 return ret;
453 }
454
455 static bool cpu_can_run(CPUState *cpu)
456 {
457 if (cpu->stop) {
458 return false;
459 }
460 if (cpu->stopped || !runstate_is_running()) {
461 return false;
462 }
463 return true;
464 }
465
466 static void cpu_handle_guest_debug(CPUState *cpu)
467 {
468 gdb_set_stop_cpu(cpu);
469 qemu_system_debug_request();
470 cpu->stopped = true;
471 }
472
473 static void cpu_signal(int sig)
474 {
475 if (current_cpu) {
476 cpu_exit(current_cpu);
477 }
478 exit_request = 1;
479 }
480
481 #ifdef CONFIG_LINUX
482 static void sigbus_reraise(void)
483 {
484 sigset_t set;
485 struct sigaction action;
486
487 memset(&action, 0, sizeof(action));
488 action.sa_handler = SIG_DFL;
489 if (!sigaction(SIGBUS, &action, NULL)) {
490 raise(SIGBUS);
491 sigemptyset(&set);
492 sigaddset(&set, SIGBUS);
493 sigprocmask(SIG_UNBLOCK, &set, NULL);
494 }
495 perror("Failed to re-raise SIGBUS!\n");
496 abort();
497 }
498
499 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
500 void *ctx)
501 {
502 if (kvm_on_sigbus(siginfo->ssi_code,
503 (void *)(intptr_t)siginfo->ssi_addr)) {
504 sigbus_reraise();
505 }
506 }
507
508 static void qemu_init_sigbus(void)
509 {
510 struct sigaction action;
511
512 memset(&action, 0, sizeof(action));
513 action.sa_flags = SA_SIGINFO;
514 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
515 sigaction(SIGBUS, &action, NULL);
516
517 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
518 }
519
520 static void qemu_kvm_eat_signals(CPUState *cpu)
521 {
522 struct timespec ts = { 0, 0 };
523 siginfo_t siginfo;
524 sigset_t waitset;
525 sigset_t chkset;
526 int r;
527
528 sigemptyset(&waitset);
529 sigaddset(&waitset, SIG_IPI);
530 sigaddset(&waitset, SIGBUS);
531
532 do {
533 r = sigtimedwait(&waitset, &siginfo, &ts);
534 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
535 perror("sigtimedwait");
536 exit(1);
537 }
538
539 switch (r) {
540 case SIGBUS:
541 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
542 sigbus_reraise();
543 }
544 break;
545 default:
546 break;
547 }
548
549 r = sigpending(&chkset);
550 if (r == -1) {
551 perror("sigpending");
552 exit(1);
553 }
554 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
555 }
556
557 #else /* !CONFIG_LINUX */
558
559 static void qemu_init_sigbus(void)
560 {
561 }
562
563 static void qemu_kvm_eat_signals(CPUState *cpu)
564 {
565 }
566 #endif /* !CONFIG_LINUX */
567
568 #ifndef _WIN32
569 static void dummy_signal(int sig)
570 {
571 }
572
573 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
574 {
575 int r;
576 sigset_t set;
577 struct sigaction sigact;
578
579 memset(&sigact, 0, sizeof(sigact));
580 sigact.sa_handler = dummy_signal;
581 sigaction(SIG_IPI, &sigact, NULL);
582
583 pthread_sigmask(SIG_BLOCK, NULL, &set);
584 sigdelset(&set, SIG_IPI);
585 sigdelset(&set, SIGBUS);
586 r = kvm_set_signal_mask(cpu, &set);
587 if (r) {
588 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
589 exit(1);
590 }
591 }
592
593 static void qemu_tcg_init_cpu_signals(void)
594 {
595 sigset_t set;
596 struct sigaction sigact;
597
598 memset(&sigact, 0, sizeof(sigact));
599 sigact.sa_handler = cpu_signal;
600 sigaction(SIG_IPI, &sigact, NULL);
601
602 sigemptyset(&set);
603 sigaddset(&set, SIG_IPI);
604 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
605 }
606
607 #else /* _WIN32 */
608 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
609 {
610 abort();
611 }
612
613 static void qemu_tcg_init_cpu_signals(void)
614 {
615 }
616 #endif /* _WIN32 */
617
618 static QemuMutex qemu_global_mutex;
619 static QemuCond qemu_io_proceeded_cond;
620 static bool iothread_requesting_mutex;
621
622 static QemuThread io_thread;
623
624 static QemuThread *tcg_cpu_thread;
625 static QemuCond *tcg_halt_cond;
626
627 /* cpu creation */
628 static QemuCond qemu_cpu_cond;
629 /* system init */
630 static QemuCond qemu_pause_cond;
631 static QemuCond qemu_work_cond;
632
633 void qemu_init_cpu_loop(void)
634 {
635 qemu_init_sigbus();
636 qemu_cond_init(&qemu_cpu_cond);
637 qemu_cond_init(&qemu_pause_cond);
638 qemu_cond_init(&qemu_work_cond);
639 qemu_cond_init(&qemu_io_proceeded_cond);
640 qemu_mutex_init(&qemu_global_mutex);
641
642 qemu_thread_get_self(&io_thread);
643 }
644
645 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
646 {
647 struct qemu_work_item wi;
648
649 if (qemu_cpu_is_self(cpu)) {
650 func(data);
651 return;
652 }
653
654 wi.func = func;
655 wi.data = data;
656 wi.free = false;
657 if (cpu->queued_work_first == NULL) {
658 cpu->queued_work_first = &wi;
659 } else {
660 cpu->queued_work_last->next = &wi;
661 }
662 cpu->queued_work_last = &wi;
663 wi.next = NULL;
664 wi.done = false;
665
666 qemu_cpu_kick(cpu);
667 while (!wi.done) {
668 CPUState *self_cpu = current_cpu;
669
670 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
671 current_cpu = self_cpu;
672 }
673 }
674
675 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
676 {
677 struct qemu_work_item *wi;
678
679 if (qemu_cpu_is_self(cpu)) {
680 func(data);
681 return;
682 }
683
684 wi = g_malloc0(sizeof(struct qemu_work_item));
685 wi->func = func;
686 wi->data = data;
687 wi->free = true;
688 if (cpu->queued_work_first == NULL) {
689 cpu->queued_work_first = wi;
690 } else {
691 cpu->queued_work_last->next = wi;
692 }
693 cpu->queued_work_last = wi;
694 wi->next = NULL;
695 wi->done = false;
696
697 qemu_cpu_kick(cpu);
698 }
699
700 static void flush_queued_work(CPUState *cpu)
701 {
702 struct qemu_work_item *wi;
703
704 if (cpu->queued_work_first == NULL) {
705 return;
706 }
707
708 while ((wi = cpu->queued_work_first)) {
709 cpu->queued_work_first = wi->next;
710 wi->func(wi->data);
711 wi->done = true;
712 if (wi->free) {
713 g_free(wi);
714 }
715 }
716 cpu->queued_work_last = NULL;
717 qemu_cond_broadcast(&qemu_work_cond);
718 }
719
720 static void qemu_wait_io_event_common(CPUState *cpu)
721 {
722 if (cpu->stop) {
723 cpu->stop = false;
724 cpu->stopped = true;
725 qemu_cond_signal(&qemu_pause_cond);
726 }
727 flush_queued_work(cpu);
728 cpu->thread_kicked = false;
729 }
730
731 static void qemu_tcg_wait_io_event(void)
732 {
733 CPUState *cpu;
734
735 while (all_cpu_threads_idle()) {
736 /* Start accounting real time to the virtual clock if the CPUs
737 are idle. */
738 qemu_clock_warp(vm_clock);
739 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
740 }
741
742 while (iothread_requesting_mutex) {
743 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
744 }
745
746 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
747 qemu_wait_io_event_common(cpu);
748 }
749 }
750
751 static void qemu_kvm_wait_io_event(CPUState *cpu)
752 {
753 while (cpu_thread_is_idle(cpu)) {
754 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
755 }
756
757 qemu_kvm_eat_signals(cpu);
758 qemu_wait_io_event_common(cpu);
759 }
760
761 static void *qemu_kvm_cpu_thread_fn(void *arg)
762 {
763 CPUState *cpu = arg;
764 int r;
765
766 qemu_mutex_lock(&qemu_global_mutex);
767 qemu_thread_get_self(cpu->thread);
768 cpu->thread_id = qemu_get_thread_id();
769 current_cpu = cpu;
770
771 r = kvm_init_vcpu(cpu);
772 if (r < 0) {
773 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
774 exit(1);
775 }
776
777 qemu_kvm_init_cpu_signals(cpu);
778
779 /* signal CPU creation */
780 cpu->created = true;
781 qemu_cond_signal(&qemu_cpu_cond);
782
783 while (1) {
784 if (cpu_can_run(cpu)) {
785 r = kvm_cpu_exec(cpu);
786 if (r == EXCP_DEBUG) {
787 cpu_handle_guest_debug(cpu);
788 }
789 }
790 qemu_kvm_wait_io_event(cpu);
791 }
792
793 return NULL;
794 }
795
796 static void *qemu_dummy_cpu_thread_fn(void *arg)
797 {
798 #ifdef _WIN32
799 fprintf(stderr, "qtest is not supported under Windows\n");
800 exit(1);
801 #else
802 CPUState *cpu = arg;
803 sigset_t waitset;
804 int r;
805
806 qemu_mutex_lock_iothread();
807 qemu_thread_get_self(cpu->thread);
808 cpu->thread_id = qemu_get_thread_id();
809
810 sigemptyset(&waitset);
811 sigaddset(&waitset, SIG_IPI);
812
813 /* signal CPU creation */
814 cpu->created = true;
815 qemu_cond_signal(&qemu_cpu_cond);
816
817 current_cpu = cpu;
818 while (1) {
819 current_cpu = NULL;
820 qemu_mutex_unlock_iothread();
821 do {
822 int sig;
823 r = sigwait(&waitset, &sig);
824 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
825 if (r == -1) {
826 perror("sigwait");
827 exit(1);
828 }
829 qemu_mutex_lock_iothread();
830 current_cpu = cpu;
831 qemu_wait_io_event_common(cpu);
832 }
833
834 return NULL;
835 #endif
836 }
837
838 static void tcg_exec_all(void);
839
840 static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
841 {
842 cpu->thread_id = qemu_get_thread_id();
843 cpu->created = true;
844 }
845
846 static void *qemu_tcg_cpu_thread_fn(void *arg)
847 {
848 CPUState *cpu = arg;
849
850 qemu_tcg_init_cpu_signals();
851 qemu_thread_get_self(cpu->thread);
852
853 qemu_mutex_lock(&qemu_global_mutex);
854 qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
855 qemu_cond_signal(&qemu_cpu_cond);
856
857 /* wait for initial kick-off after machine start */
858 while (first_cpu->stopped) {
859 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
860
861 /* process any pending work */
862 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
863 qemu_wait_io_event_common(cpu);
864 }
865 }
866
867 while (1) {
868 tcg_exec_all();
869 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
870 qemu_notify_event();
871 }
872 qemu_tcg_wait_io_event();
873 }
874
875 return NULL;
876 }
877
878 static void qemu_cpu_kick_thread(CPUState *cpu)
879 {
880 #ifndef _WIN32
881 int err;
882
883 err = pthread_kill(cpu->thread->thread, SIG_IPI);
884 if (err) {
885 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
886 exit(1);
887 }
888 #else /* _WIN32 */
889 if (!qemu_cpu_is_self(cpu)) {
890 CONTEXT tcgContext;
891
892 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
893 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
894 GetLastError());
895 exit(1);
896 }
897
898 /* On multi-core systems, we are not sure that the thread is actually
899 * suspended until we can get the context.
900 */
901 tcgContext.ContextFlags = CONTEXT_CONTROL;
902 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
903 continue;
904 }
905
906 cpu_signal(0);
907
908 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
909 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
910 GetLastError());
911 exit(1);
912 }
913 }
914 #endif
915 }
916
917 void qemu_cpu_kick(CPUState *cpu)
918 {
919 qemu_cond_broadcast(cpu->halt_cond);
920 if (!tcg_enabled() && !cpu->thread_kicked) {
921 qemu_cpu_kick_thread(cpu);
922 cpu->thread_kicked = true;
923 }
924 }
925
926 void qemu_cpu_kick_self(void)
927 {
928 #ifndef _WIN32
929 assert(current_cpu);
930
931 if (!current_cpu->thread_kicked) {
932 qemu_cpu_kick_thread(current_cpu);
933 current_cpu->thread_kicked = true;
934 }
935 #else
936 abort();
937 #endif
938 }
939
940 bool qemu_cpu_is_self(CPUState *cpu)
941 {
942 return qemu_thread_is_self(cpu->thread);
943 }
944
945 static bool qemu_in_vcpu_thread(void)
946 {
947 return current_cpu && qemu_cpu_is_self(current_cpu);
948 }
949
950 void qemu_mutex_lock_iothread(void)
951 {
952 if (!tcg_enabled()) {
953 qemu_mutex_lock(&qemu_global_mutex);
954 } else {
955 iothread_requesting_mutex = true;
956 if (qemu_mutex_trylock(&qemu_global_mutex)) {
957 qemu_cpu_kick_thread(first_cpu);
958 qemu_mutex_lock(&qemu_global_mutex);
959 }
960 iothread_requesting_mutex = false;
961 qemu_cond_broadcast(&qemu_io_proceeded_cond);
962 }
963 }
964
965 void qemu_mutex_unlock_iothread(void)
966 {
967 qemu_mutex_unlock(&qemu_global_mutex);
968 }
969
970 static int all_vcpus_paused(void)
971 {
972 CPUState *cpu = first_cpu;
973
974 while (cpu) {
975 if (!cpu->stopped) {
976 return 0;
977 }
978 cpu = cpu->next_cpu;
979 }
980
981 return 1;
982 }
983
984 void pause_all_vcpus(void)
985 {
986 CPUState *cpu = first_cpu;
987
988 qemu_clock_enable(vm_clock, false);
989 while (cpu) {
990 cpu->stop = true;
991 qemu_cpu_kick(cpu);
992 cpu = cpu->next_cpu;
993 }
994
995 if (qemu_in_vcpu_thread()) {
996 cpu_stop_current();
997 if (!kvm_enabled()) {
998 cpu = first_cpu;
999 while (cpu) {
1000 cpu->stop = false;
1001 cpu->stopped = true;
1002 cpu = cpu->next_cpu;
1003 }
1004 return;
1005 }
1006 }
1007
1008 while (!all_vcpus_paused()) {
1009 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1010 cpu = first_cpu;
1011 while (cpu) {
1012 qemu_cpu_kick(cpu);
1013 cpu = cpu->next_cpu;
1014 }
1015 }
1016 }
1017
1018 void cpu_resume(CPUState *cpu)
1019 {
1020 cpu->stop = false;
1021 cpu->stopped = false;
1022 qemu_cpu_kick(cpu);
1023 }
1024
1025 void resume_all_vcpus(void)
1026 {
1027 CPUState *cpu = first_cpu;
1028
1029 qemu_clock_enable(vm_clock, true);
1030 while (cpu) {
1031 cpu_resume(cpu);
1032 cpu = cpu->next_cpu;
1033 }
1034 }
1035
1036 static void qemu_tcg_init_vcpu(CPUState *cpu)
1037 {
1038 /* share a single thread for all cpus with TCG */
1039 if (!tcg_cpu_thread) {
1040 cpu->thread = g_malloc0(sizeof(QemuThread));
1041 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1042 qemu_cond_init(cpu->halt_cond);
1043 tcg_halt_cond = cpu->halt_cond;
1044 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1045 QEMU_THREAD_JOINABLE);
1046 #ifdef _WIN32
1047 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1048 #endif
1049 while (!cpu->created) {
1050 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1051 }
1052 tcg_cpu_thread = cpu->thread;
1053 } else {
1054 cpu->thread = tcg_cpu_thread;
1055 cpu->halt_cond = tcg_halt_cond;
1056 }
1057 }
1058
1059 static void qemu_kvm_start_vcpu(CPUState *cpu)
1060 {
1061 cpu->thread = g_malloc0(sizeof(QemuThread));
1062 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1063 qemu_cond_init(cpu->halt_cond);
1064 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1065 QEMU_THREAD_JOINABLE);
1066 while (!cpu->created) {
1067 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1068 }
1069 }
1070
1071 static void qemu_dummy_start_vcpu(CPUState *cpu)
1072 {
1073 cpu->thread = g_malloc0(sizeof(QemuThread));
1074 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1075 qemu_cond_init(cpu->halt_cond);
1076 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1077 QEMU_THREAD_JOINABLE);
1078 while (!cpu->created) {
1079 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1080 }
1081 }
1082
1083 void qemu_init_vcpu(CPUState *cpu)
1084 {
1085 cpu->nr_cores = smp_cores;
1086 cpu->nr_threads = smp_threads;
1087 cpu->stopped = true;
1088 if (kvm_enabled()) {
1089 qemu_kvm_start_vcpu(cpu);
1090 } else if (tcg_enabled()) {
1091 qemu_tcg_init_vcpu(cpu);
1092 } else {
1093 qemu_dummy_start_vcpu(cpu);
1094 }
1095 }
1096
1097 void cpu_stop_current(void)
1098 {
1099 if (current_cpu) {
1100 current_cpu->stop = false;
1101 current_cpu->stopped = true;
1102 cpu_exit(current_cpu);
1103 qemu_cond_signal(&qemu_pause_cond);
1104 }
1105 }
1106
1107 int vm_stop(RunState state)
1108 {
1109 if (qemu_in_vcpu_thread()) {
1110 qemu_system_vmstop_request(state);
1111 /*
1112 * FIXME: should not return to device code in case
1113 * vm_stop() has been requested.
1114 */
1115 cpu_stop_current();
1116 return 0;
1117 }
1118
1119 return do_vm_stop(state);
1120 }
1121
1122 /* does a state transition even if the VM is already stopped,
1123 current state is forgotten forever */
1124 int vm_stop_force_state(RunState state)
1125 {
1126 if (runstate_is_running()) {
1127 return vm_stop(state);
1128 } else {
1129 runstate_set(state);
1130 /* Make sure to return an error if the flush in a previous vm_stop()
1131 * failed. */
1132 return bdrv_flush_all();
1133 }
1134 }
1135
1136 static int tcg_cpu_exec(CPUArchState *env)
1137 {
1138 int ret;
1139 #ifdef CONFIG_PROFILER
1140 int64_t ti;
1141 #endif
1142
1143 #ifdef CONFIG_PROFILER
1144 ti = profile_getclock();
1145 #endif
1146 if (use_icount) {
1147 int64_t count;
1148 int decr;
1149 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1150 env->icount_decr.u16.low = 0;
1151 env->icount_extra = 0;
1152 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1153 qemu_icount += count;
1154 decr = (count > 0xffff) ? 0xffff : count;
1155 count -= decr;
1156 env->icount_decr.u16.low = decr;
1157 env->icount_extra = count;
1158 }
1159 ret = cpu_exec(env);
1160 #ifdef CONFIG_PROFILER
1161 qemu_time += profile_getclock() - ti;
1162 #endif
1163 if (use_icount) {
1164 /* Fold pending instructions back into the
1165 instruction counter, and clear the interrupt flag. */
1166 qemu_icount -= (env->icount_decr.u16.low
1167 + env->icount_extra);
1168 env->icount_decr.u32 = 0;
1169 env->icount_extra = 0;
1170 }
1171 return ret;
1172 }
1173
1174 static void tcg_exec_all(void)
1175 {
1176 int r;
1177
1178 /* Account partial waits to the vm_clock. */
1179 qemu_clock_warp(vm_clock);
1180
1181 if (next_cpu == NULL) {
1182 next_cpu = first_cpu;
1183 }
1184 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1185 CPUState *cpu = next_cpu;
1186 CPUArchState *env = cpu->env_ptr;
1187
1188 qemu_clock_enable(vm_clock,
1189 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1190
1191 if (cpu_can_run(cpu)) {
1192 r = tcg_cpu_exec(env);
1193 if (r == EXCP_DEBUG) {
1194 cpu_handle_guest_debug(cpu);
1195 break;
1196 }
1197 } else if (cpu->stop || cpu->stopped) {
1198 break;
1199 }
1200 }
1201 exit_request = 0;
1202 }
1203
1204 void set_numa_modes(void)
1205 {
1206 CPUState *cpu;
1207 int i;
1208
1209 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1210 for (i = 0; i < nb_numa_nodes; i++) {
1211 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1212 cpu->numa_node = i;
1213 }
1214 }
1215 }
1216 }
1217
1218 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1219 {
1220 /* XXX: implement xxx_cpu_list for targets that still miss it */
1221 #if defined(cpu_list)
1222 cpu_list(f, cpu_fprintf);
1223 #endif
1224 }
1225
1226 CpuInfoList *qmp_query_cpus(Error **errp)
1227 {
1228 CpuInfoList *head = NULL, *cur_item = NULL;
1229 CPUState *cpu;
1230
1231 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1232 CpuInfoList *info;
1233 #if defined(TARGET_I386)
1234 X86CPU *x86_cpu = X86_CPU(cpu);
1235 CPUX86State *env = &x86_cpu->env;
1236 #elif defined(TARGET_PPC)
1237 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1238 CPUPPCState *env = &ppc_cpu->env;
1239 #elif defined(TARGET_SPARC)
1240 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1241 CPUSPARCState *env = &sparc_cpu->env;
1242 #elif defined(TARGET_MIPS)
1243 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1244 CPUMIPSState *env = &mips_cpu->env;
1245 #endif
1246
1247 cpu_synchronize_state(cpu);
1248
1249 info = g_malloc0(sizeof(*info));
1250 info->value = g_malloc0(sizeof(*info->value));
1251 info->value->CPU = cpu->cpu_index;
1252 info->value->current = (cpu == first_cpu);
1253 info->value->halted = cpu->halted;
1254 info->value->thread_id = cpu->thread_id;
1255 #if defined(TARGET_I386)
1256 info->value->has_pc = true;
1257 info->value->pc = env->eip + env->segs[R_CS].base;
1258 #elif defined(TARGET_PPC)
1259 info->value->has_nip = true;
1260 info->value->nip = env->nip;
1261 #elif defined(TARGET_SPARC)
1262 info->value->has_pc = true;
1263 info->value->pc = env->pc;
1264 info->value->has_npc = true;
1265 info->value->npc = env->npc;
1266 #elif defined(TARGET_MIPS)
1267 info->value->has_PC = true;
1268 info->value->PC = env->active_tc.PC;
1269 #endif
1270
1271 /* XXX: waiting for the qapi to support GSList */
1272 if (!cur_item) {
1273 head = cur_item = info;
1274 } else {
1275 cur_item->next = info;
1276 cur_item = info;
1277 }
1278 }
1279
1280 return head;
1281 }
1282
1283 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1284 bool has_cpu, int64_t cpu_index, Error **errp)
1285 {
1286 FILE *f;
1287 uint32_t l;
1288 CPUState *cpu;
1289 uint8_t buf[1024];
1290
1291 if (!has_cpu) {
1292 cpu_index = 0;
1293 }
1294
1295 cpu = qemu_get_cpu(cpu_index);
1296 if (cpu == NULL) {
1297 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1298 "a CPU number");
1299 return;
1300 }
1301
1302 f = fopen(filename, "wb");
1303 if (!f) {
1304 error_setg_file_open(errp, errno, filename);
1305 return;
1306 }
1307
1308 while (size != 0) {
1309 l = sizeof(buf);
1310 if (l > size)
1311 l = size;
1312 cpu_memory_rw_debug(cpu, addr, buf, l, 0);
1313 if (fwrite(buf, 1, l, f) != l) {
1314 error_set(errp, QERR_IO_ERROR);
1315 goto exit;
1316 }
1317 addr += l;
1318 size -= l;
1319 }
1320
1321 exit:
1322 fclose(f);
1323 }
1324
1325 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1326 Error **errp)
1327 {
1328 FILE *f;
1329 uint32_t l;
1330 uint8_t buf[1024];
1331
1332 f = fopen(filename, "wb");
1333 if (!f) {
1334 error_setg_file_open(errp, errno, filename);
1335 return;
1336 }
1337
1338 while (size != 0) {
1339 l = sizeof(buf);
1340 if (l > size)
1341 l = size;
1342 cpu_physical_memory_rw(addr, buf, l, 0);
1343 if (fwrite(buf, 1, l, f) != l) {
1344 error_set(errp, QERR_IO_ERROR);
1345 goto exit;
1346 }
1347 addr += l;
1348 size -= l;
1349 }
1350
1351 exit:
1352 fclose(f);
1353 }
1354
1355 void qmp_inject_nmi(Error **errp)
1356 {
1357 #if defined(TARGET_I386)
1358 CPUState *cs;
1359
1360 for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
1361 X86CPU *cpu = X86_CPU(cs);
1362 CPUX86State *env = &cpu->env;
1363
1364 if (!env->apic_state) {
1365 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1366 } else {
1367 apic_deliver_nmi(env->apic_state);
1368 }
1369 }
1370 #else
1371 error_set(errp, QERR_UNSUPPORTED);
1372 #endif
1373 }