]> git.proxmox.com Git - qemu.git/blob - cpus.c
Merge remote-tracking branch 'quintela/migration.next' into staging
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40
41 #ifndef _WIN32
42 #include "qemu/compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUState *cpu)
66 {
67 if (cpu->stop || cpu->queued_work_first) {
68 return false;
69 }
70 if (cpu->stopped || !runstate_is_running()) {
71 return true;
72 }
73 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74 kvm_halt_in_kernel()) {
75 return false;
76 }
77 return true;
78 }
79
80 static bool all_cpu_threads_idle(void)
81 {
82 CPUState *cpu;
83
84 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
85 if (!cpu_thread_is_idle(cpu)) {
86 return false;
87 }
88 }
89 return true;
90 }
91
92 /***********************************************************/
93 /* guest cycle counter */
94
95 /* Conversion factor from emulated instructions to virtual clock ticks. */
96 static int icount_time_shift;
97 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
98 #define MAX_ICOUNT_SHIFT 10
99 /* Compensate for varying guest execution speed. */
100 static int64_t qemu_icount_bias;
101 static QEMUTimer *icount_rt_timer;
102 static QEMUTimer *icount_vm_timer;
103 static QEMUTimer *icount_warp_timer;
104 static int64_t vm_clock_warp_start;
105 static int64_t qemu_icount;
106
107 typedef struct TimersState {
108 int64_t cpu_ticks_prev;
109 int64_t cpu_ticks_offset;
110 int64_t cpu_clock_offset;
111 int32_t cpu_ticks_enabled;
112 int64_t dummy;
113 } TimersState;
114
115 TimersState timers_state;
116
117 /* Return the virtual CPU time, based on the instruction counter. */
118 int64_t cpu_get_icount(void)
119 {
120 int64_t icount;
121 CPUState *cpu = current_cpu;
122
123 icount = qemu_icount;
124 if (cpu) {
125 CPUArchState *env = cpu->env_ptr;
126 if (!can_do_io(env)) {
127 fprintf(stderr, "Bad clock read\n");
128 }
129 icount -= (env->icount_decr.u16.low + env->icount_extra);
130 }
131 return qemu_icount_bias + (icount << icount_time_shift);
132 }
133
134 /* return the host CPU cycle counter and handle stop/restart */
135 int64_t cpu_get_ticks(void)
136 {
137 if (use_icount) {
138 return cpu_get_icount();
139 }
140 if (!timers_state.cpu_ticks_enabled) {
141 return timers_state.cpu_ticks_offset;
142 } else {
143 int64_t ticks;
144 ticks = cpu_get_real_ticks();
145 if (timers_state.cpu_ticks_prev > ticks) {
146 /* Note: non increasing ticks may happen if the host uses
147 software suspend */
148 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
149 }
150 timers_state.cpu_ticks_prev = ticks;
151 return ticks + timers_state.cpu_ticks_offset;
152 }
153 }
154
155 /* return the host CPU monotonic timer and handle stop/restart */
156 int64_t cpu_get_clock(void)
157 {
158 int64_t ti;
159 if (!timers_state.cpu_ticks_enabled) {
160 return timers_state.cpu_clock_offset;
161 } else {
162 ti = get_clock();
163 return ti + timers_state.cpu_clock_offset;
164 }
165 }
166
167 /* enable cpu_get_ticks() */
168 void cpu_enable_ticks(void)
169 {
170 if (!timers_state.cpu_ticks_enabled) {
171 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
172 timers_state.cpu_clock_offset -= get_clock();
173 timers_state.cpu_ticks_enabled = 1;
174 }
175 }
176
177 /* disable cpu_get_ticks() : the clock is stopped. You must not call
178 cpu_get_ticks() after that. */
179 void cpu_disable_ticks(void)
180 {
181 if (timers_state.cpu_ticks_enabled) {
182 timers_state.cpu_ticks_offset = cpu_get_ticks();
183 timers_state.cpu_clock_offset = cpu_get_clock();
184 timers_state.cpu_ticks_enabled = 0;
185 }
186 }
187
188 /* Correlation between real and virtual time is always going to be
189 fairly approximate, so ignore small variation.
190 When the guest is idle real and virtual time will be aligned in
191 the IO wait loop. */
192 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
193
194 static void icount_adjust(void)
195 {
196 int64_t cur_time;
197 int64_t cur_icount;
198 int64_t delta;
199 static int64_t last_delta;
200 /* If the VM is not running, then do nothing. */
201 if (!runstate_is_running()) {
202 return;
203 }
204 cur_time = cpu_get_clock();
205 cur_icount = qemu_get_clock_ns(vm_clock);
206 delta = cur_icount - cur_time;
207 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
208 if (delta > 0
209 && last_delta + ICOUNT_WOBBLE < delta * 2
210 && icount_time_shift > 0) {
211 /* The guest is getting too far ahead. Slow time down. */
212 icount_time_shift--;
213 }
214 if (delta < 0
215 && last_delta - ICOUNT_WOBBLE > delta * 2
216 && icount_time_shift < MAX_ICOUNT_SHIFT) {
217 /* The guest is getting too far behind. Speed time up. */
218 icount_time_shift++;
219 }
220 last_delta = delta;
221 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
222 }
223
224 static void icount_adjust_rt(void *opaque)
225 {
226 qemu_mod_timer(icount_rt_timer,
227 qemu_get_clock_ms(rt_clock) + 1000);
228 icount_adjust();
229 }
230
231 static void icount_adjust_vm(void *opaque)
232 {
233 qemu_mod_timer(icount_vm_timer,
234 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
235 icount_adjust();
236 }
237
238 static int64_t qemu_icount_round(int64_t count)
239 {
240 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
241 }
242
243 static void icount_warp_rt(void *opaque)
244 {
245 if (vm_clock_warp_start == -1) {
246 return;
247 }
248
249 if (runstate_is_running()) {
250 int64_t clock = qemu_get_clock_ns(rt_clock);
251 int64_t warp_delta = clock - vm_clock_warp_start;
252 if (use_icount == 1) {
253 qemu_icount_bias += warp_delta;
254 } else {
255 /*
256 * In adaptive mode, do not let the vm_clock run too
257 * far ahead of real time.
258 */
259 int64_t cur_time = cpu_get_clock();
260 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
261 int64_t delta = cur_time - cur_icount;
262 qemu_icount_bias += MIN(warp_delta, delta);
263 }
264 if (qemu_clock_expired(vm_clock)) {
265 qemu_notify_event();
266 }
267 }
268 vm_clock_warp_start = -1;
269 }
270
271 void qtest_clock_warp(int64_t dest)
272 {
273 int64_t clock = qemu_get_clock_ns(vm_clock);
274 assert(qtest_enabled());
275 while (clock < dest) {
276 int64_t deadline = qemu_clock_deadline(vm_clock);
277 int64_t warp = MIN(dest - clock, deadline);
278 qemu_icount_bias += warp;
279 qemu_run_timers(vm_clock);
280 clock = qemu_get_clock_ns(vm_clock);
281 }
282 qemu_notify_event();
283 }
284
285 void qemu_clock_warp(QEMUClock *clock)
286 {
287 int64_t deadline;
288
289 /*
290 * There are too many global variables to make the "warp" behavior
291 * applicable to other clocks. But a clock argument removes the
292 * need for if statements all over the place.
293 */
294 if (clock != vm_clock || !use_icount) {
295 return;
296 }
297
298 /*
299 * If the CPUs have been sleeping, advance the vm_clock timer now. This
300 * ensures that the deadline for the timer is computed correctly below.
301 * This also makes sure that the insn counter is synchronized before the
302 * CPU starts running, in case the CPU is woken by an event other than
303 * the earliest vm_clock timer.
304 */
305 icount_warp_rt(NULL);
306 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
307 qemu_del_timer(icount_warp_timer);
308 return;
309 }
310
311 if (qtest_enabled()) {
312 /* When testing, qtest commands advance icount. */
313 return;
314 }
315
316 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
317 deadline = qemu_clock_deadline(vm_clock);
318 if (deadline > 0) {
319 /*
320 * Ensure the vm_clock proceeds even when the virtual CPU goes to
321 * sleep. Otherwise, the CPU might be waiting for a future timer
322 * interrupt to wake it up, but the interrupt never comes because
323 * the vCPU isn't running any insns and thus doesn't advance the
324 * vm_clock.
325 *
326 * An extreme solution for this problem would be to never let VCPUs
327 * sleep in icount mode if there is a pending vm_clock timer; rather
328 * time could just advance to the next vm_clock event. Instead, we
329 * do stop VCPUs and only advance vm_clock after some "real" time,
330 * (related to the time left until the next event) has passed. This
331 * rt_clock timer will do this. This avoids that the warps are too
332 * visible externally---for example, you will not be sending network
333 * packets continuously instead of every 100ms.
334 */
335 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
336 } else {
337 qemu_notify_event();
338 }
339 }
340
341 static const VMStateDescription vmstate_timers = {
342 .name = "timer",
343 .version_id = 2,
344 .minimum_version_id = 1,
345 .minimum_version_id_old = 1,
346 .fields = (VMStateField[]) {
347 VMSTATE_INT64(cpu_ticks_offset, TimersState),
348 VMSTATE_INT64(dummy, TimersState),
349 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
350 VMSTATE_END_OF_LIST()
351 }
352 };
353
354 void configure_icount(const char *option)
355 {
356 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
357 if (!option) {
358 return;
359 }
360
361 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
362 if (strcmp(option, "auto") != 0) {
363 icount_time_shift = strtol(option, NULL, 0);
364 use_icount = 1;
365 return;
366 }
367
368 use_icount = 2;
369
370 /* 125MIPS seems a reasonable initial guess at the guest speed.
371 It will be corrected fairly quickly anyway. */
372 icount_time_shift = 3;
373
374 /* Have both realtime and virtual time triggers for speed adjustment.
375 The realtime trigger catches emulated time passing too slowly,
376 the virtual time trigger catches emulated time passing too fast.
377 Realtime triggers occur even when idle, so use them less frequently
378 than VM triggers. */
379 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
380 qemu_mod_timer(icount_rt_timer,
381 qemu_get_clock_ms(rt_clock) + 1000);
382 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
383 qemu_mod_timer(icount_vm_timer,
384 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
385 }
386
387 /***********************************************************/
388 void hw_error(const char *fmt, ...)
389 {
390 va_list ap;
391 CPUState *cpu;
392
393 va_start(ap, fmt);
394 fprintf(stderr, "qemu: hardware error: ");
395 vfprintf(stderr, fmt, ap);
396 fprintf(stderr, "\n");
397 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
398 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
399 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
400 }
401 va_end(ap);
402 abort();
403 }
404
405 void cpu_synchronize_all_states(void)
406 {
407 CPUState *cpu;
408
409 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
410 cpu_synchronize_state(cpu);
411 }
412 }
413
414 void cpu_synchronize_all_post_reset(void)
415 {
416 CPUState *cpu;
417
418 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
419 cpu_synchronize_post_reset(cpu);
420 }
421 }
422
423 void cpu_synchronize_all_post_init(void)
424 {
425 CPUState *cpu;
426
427 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
428 cpu_synchronize_post_init(cpu);
429 }
430 }
431
432 bool cpu_is_stopped(CPUState *cpu)
433 {
434 return !runstate_is_running() || cpu->stopped;
435 }
436
437 static int do_vm_stop(RunState state)
438 {
439 int ret = 0;
440
441 if (runstate_is_running()) {
442 cpu_disable_ticks();
443 pause_all_vcpus();
444 runstate_set(state);
445 vm_state_notify(0, state);
446 bdrv_drain_all();
447 ret = bdrv_flush_all();
448 monitor_protocol_event(QEVENT_STOP, NULL);
449 }
450
451 return ret;
452 }
453
454 static bool cpu_can_run(CPUState *cpu)
455 {
456 if (cpu->stop) {
457 return false;
458 }
459 if (cpu->stopped || !runstate_is_running()) {
460 return false;
461 }
462 return true;
463 }
464
465 static void cpu_handle_guest_debug(CPUState *cpu)
466 {
467 gdb_set_stop_cpu(cpu);
468 qemu_system_debug_request();
469 cpu->stopped = true;
470 }
471
472 static void cpu_signal(int sig)
473 {
474 if (current_cpu) {
475 cpu_exit(current_cpu);
476 }
477 exit_request = 1;
478 }
479
480 #ifdef CONFIG_LINUX
481 static void sigbus_reraise(void)
482 {
483 sigset_t set;
484 struct sigaction action;
485
486 memset(&action, 0, sizeof(action));
487 action.sa_handler = SIG_DFL;
488 if (!sigaction(SIGBUS, &action, NULL)) {
489 raise(SIGBUS);
490 sigemptyset(&set);
491 sigaddset(&set, SIGBUS);
492 sigprocmask(SIG_UNBLOCK, &set, NULL);
493 }
494 perror("Failed to re-raise SIGBUS!\n");
495 abort();
496 }
497
498 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
499 void *ctx)
500 {
501 if (kvm_on_sigbus(siginfo->ssi_code,
502 (void *)(intptr_t)siginfo->ssi_addr)) {
503 sigbus_reraise();
504 }
505 }
506
507 static void qemu_init_sigbus(void)
508 {
509 struct sigaction action;
510
511 memset(&action, 0, sizeof(action));
512 action.sa_flags = SA_SIGINFO;
513 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
514 sigaction(SIGBUS, &action, NULL);
515
516 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
517 }
518
519 static void qemu_kvm_eat_signals(CPUState *cpu)
520 {
521 struct timespec ts = { 0, 0 };
522 siginfo_t siginfo;
523 sigset_t waitset;
524 sigset_t chkset;
525 int r;
526
527 sigemptyset(&waitset);
528 sigaddset(&waitset, SIG_IPI);
529 sigaddset(&waitset, SIGBUS);
530
531 do {
532 r = sigtimedwait(&waitset, &siginfo, &ts);
533 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
534 perror("sigtimedwait");
535 exit(1);
536 }
537
538 switch (r) {
539 case SIGBUS:
540 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
541 sigbus_reraise();
542 }
543 break;
544 default:
545 break;
546 }
547
548 r = sigpending(&chkset);
549 if (r == -1) {
550 perror("sigpending");
551 exit(1);
552 }
553 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
554 }
555
556 #else /* !CONFIG_LINUX */
557
558 static void qemu_init_sigbus(void)
559 {
560 }
561
562 static void qemu_kvm_eat_signals(CPUState *cpu)
563 {
564 }
565 #endif /* !CONFIG_LINUX */
566
567 #ifndef _WIN32
568 static void dummy_signal(int sig)
569 {
570 }
571
572 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
573 {
574 int r;
575 sigset_t set;
576 struct sigaction sigact;
577
578 memset(&sigact, 0, sizeof(sigact));
579 sigact.sa_handler = dummy_signal;
580 sigaction(SIG_IPI, &sigact, NULL);
581
582 pthread_sigmask(SIG_BLOCK, NULL, &set);
583 sigdelset(&set, SIG_IPI);
584 sigdelset(&set, SIGBUS);
585 r = kvm_set_signal_mask(cpu, &set);
586 if (r) {
587 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
588 exit(1);
589 }
590 }
591
592 static void qemu_tcg_init_cpu_signals(void)
593 {
594 sigset_t set;
595 struct sigaction sigact;
596
597 memset(&sigact, 0, sizeof(sigact));
598 sigact.sa_handler = cpu_signal;
599 sigaction(SIG_IPI, &sigact, NULL);
600
601 sigemptyset(&set);
602 sigaddset(&set, SIG_IPI);
603 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
604 }
605
606 #else /* _WIN32 */
607 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
608 {
609 abort();
610 }
611
612 static void qemu_tcg_init_cpu_signals(void)
613 {
614 }
615 #endif /* _WIN32 */
616
617 static QemuMutex qemu_global_mutex;
618 static QemuCond qemu_io_proceeded_cond;
619 static bool iothread_requesting_mutex;
620
621 static QemuThread io_thread;
622
623 static QemuThread *tcg_cpu_thread;
624 static QemuCond *tcg_halt_cond;
625
626 /* cpu creation */
627 static QemuCond qemu_cpu_cond;
628 /* system init */
629 static QemuCond qemu_pause_cond;
630 static QemuCond qemu_work_cond;
631
632 void qemu_init_cpu_loop(void)
633 {
634 qemu_init_sigbus();
635 qemu_cond_init(&qemu_cpu_cond);
636 qemu_cond_init(&qemu_pause_cond);
637 qemu_cond_init(&qemu_work_cond);
638 qemu_cond_init(&qemu_io_proceeded_cond);
639 qemu_mutex_init(&qemu_global_mutex);
640
641 qemu_thread_get_self(&io_thread);
642 }
643
644 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
645 {
646 struct qemu_work_item wi;
647
648 if (qemu_cpu_is_self(cpu)) {
649 func(data);
650 return;
651 }
652
653 wi.func = func;
654 wi.data = data;
655 wi.free = false;
656 if (cpu->queued_work_first == NULL) {
657 cpu->queued_work_first = &wi;
658 } else {
659 cpu->queued_work_last->next = &wi;
660 }
661 cpu->queued_work_last = &wi;
662 wi.next = NULL;
663 wi.done = false;
664
665 qemu_cpu_kick(cpu);
666 while (!wi.done) {
667 CPUState *self_cpu = current_cpu;
668
669 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
670 current_cpu = self_cpu;
671 }
672 }
673
674 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
675 {
676 struct qemu_work_item *wi;
677
678 if (qemu_cpu_is_self(cpu)) {
679 func(data);
680 return;
681 }
682
683 wi = g_malloc0(sizeof(struct qemu_work_item));
684 wi->func = func;
685 wi->data = data;
686 wi->free = true;
687 if (cpu->queued_work_first == NULL) {
688 cpu->queued_work_first = wi;
689 } else {
690 cpu->queued_work_last->next = wi;
691 }
692 cpu->queued_work_last = wi;
693 wi->next = NULL;
694 wi->done = false;
695
696 qemu_cpu_kick(cpu);
697 }
698
699 static void flush_queued_work(CPUState *cpu)
700 {
701 struct qemu_work_item *wi;
702
703 if (cpu->queued_work_first == NULL) {
704 return;
705 }
706
707 while ((wi = cpu->queued_work_first)) {
708 cpu->queued_work_first = wi->next;
709 wi->func(wi->data);
710 wi->done = true;
711 if (wi->free) {
712 g_free(wi);
713 }
714 }
715 cpu->queued_work_last = NULL;
716 qemu_cond_broadcast(&qemu_work_cond);
717 }
718
719 static void qemu_wait_io_event_common(CPUState *cpu)
720 {
721 if (cpu->stop) {
722 cpu->stop = false;
723 cpu->stopped = true;
724 qemu_cond_signal(&qemu_pause_cond);
725 }
726 flush_queued_work(cpu);
727 cpu->thread_kicked = false;
728 }
729
730 static void qemu_tcg_wait_io_event(void)
731 {
732 CPUState *cpu;
733
734 while (all_cpu_threads_idle()) {
735 /* Start accounting real time to the virtual clock if the CPUs
736 are idle. */
737 qemu_clock_warp(vm_clock);
738 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
739 }
740
741 while (iothread_requesting_mutex) {
742 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
743 }
744
745 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
746 qemu_wait_io_event_common(cpu);
747 }
748 }
749
750 static void qemu_kvm_wait_io_event(CPUState *cpu)
751 {
752 while (cpu_thread_is_idle(cpu)) {
753 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
754 }
755
756 qemu_kvm_eat_signals(cpu);
757 qemu_wait_io_event_common(cpu);
758 }
759
760 static void *qemu_kvm_cpu_thread_fn(void *arg)
761 {
762 CPUState *cpu = arg;
763 int r;
764
765 qemu_mutex_lock(&qemu_global_mutex);
766 qemu_thread_get_self(cpu->thread);
767 cpu->thread_id = qemu_get_thread_id();
768 current_cpu = cpu;
769
770 r = kvm_init_vcpu(cpu);
771 if (r < 0) {
772 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
773 exit(1);
774 }
775
776 qemu_kvm_init_cpu_signals(cpu);
777
778 /* signal CPU creation */
779 cpu->created = true;
780 qemu_cond_signal(&qemu_cpu_cond);
781
782 while (1) {
783 if (cpu_can_run(cpu)) {
784 r = kvm_cpu_exec(cpu);
785 if (r == EXCP_DEBUG) {
786 cpu_handle_guest_debug(cpu);
787 }
788 }
789 qemu_kvm_wait_io_event(cpu);
790 }
791
792 return NULL;
793 }
794
795 static void *qemu_dummy_cpu_thread_fn(void *arg)
796 {
797 #ifdef _WIN32
798 fprintf(stderr, "qtest is not supported under Windows\n");
799 exit(1);
800 #else
801 CPUState *cpu = arg;
802 sigset_t waitset;
803 int r;
804
805 qemu_mutex_lock_iothread();
806 qemu_thread_get_self(cpu->thread);
807 cpu->thread_id = qemu_get_thread_id();
808
809 sigemptyset(&waitset);
810 sigaddset(&waitset, SIG_IPI);
811
812 /* signal CPU creation */
813 cpu->created = true;
814 qemu_cond_signal(&qemu_cpu_cond);
815
816 current_cpu = cpu;
817 while (1) {
818 current_cpu = NULL;
819 qemu_mutex_unlock_iothread();
820 do {
821 int sig;
822 r = sigwait(&waitset, &sig);
823 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
824 if (r == -1) {
825 perror("sigwait");
826 exit(1);
827 }
828 qemu_mutex_lock_iothread();
829 current_cpu = cpu;
830 qemu_wait_io_event_common(cpu);
831 }
832
833 return NULL;
834 #endif
835 }
836
837 static void tcg_exec_all(void);
838
839 static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
840 {
841 cpu->thread_id = qemu_get_thread_id();
842 cpu->created = true;
843 }
844
845 static void *qemu_tcg_cpu_thread_fn(void *arg)
846 {
847 CPUState *cpu = arg;
848
849 qemu_tcg_init_cpu_signals();
850 qemu_thread_get_self(cpu->thread);
851
852 qemu_mutex_lock(&qemu_global_mutex);
853 qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
854 qemu_cond_signal(&qemu_cpu_cond);
855
856 /* wait for initial kick-off after machine start */
857 while (first_cpu->stopped) {
858 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
859
860 /* process any pending work */
861 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
862 qemu_wait_io_event_common(cpu);
863 }
864 }
865
866 while (1) {
867 tcg_exec_all();
868 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
869 qemu_notify_event();
870 }
871 qemu_tcg_wait_io_event();
872 }
873
874 return NULL;
875 }
876
877 static void qemu_cpu_kick_thread(CPUState *cpu)
878 {
879 #ifndef _WIN32
880 int err;
881
882 err = pthread_kill(cpu->thread->thread, SIG_IPI);
883 if (err) {
884 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
885 exit(1);
886 }
887 #else /* _WIN32 */
888 if (!qemu_cpu_is_self(cpu)) {
889 CONTEXT tcgContext;
890
891 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
892 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
893 GetLastError());
894 exit(1);
895 }
896
897 /* On multi-core systems, we are not sure that the thread is actually
898 * suspended until we can get the context.
899 */
900 tcgContext.ContextFlags = CONTEXT_CONTROL;
901 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
902 continue;
903 }
904
905 cpu_signal(0);
906
907 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
908 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
909 GetLastError());
910 exit(1);
911 }
912 }
913 #endif
914 }
915
916 void qemu_cpu_kick(CPUState *cpu)
917 {
918 qemu_cond_broadcast(cpu->halt_cond);
919 if (!tcg_enabled() && !cpu->thread_kicked) {
920 qemu_cpu_kick_thread(cpu);
921 cpu->thread_kicked = true;
922 }
923 }
924
925 void qemu_cpu_kick_self(void)
926 {
927 #ifndef _WIN32
928 assert(current_cpu);
929
930 if (!current_cpu->thread_kicked) {
931 qemu_cpu_kick_thread(current_cpu);
932 current_cpu->thread_kicked = true;
933 }
934 #else
935 abort();
936 #endif
937 }
938
939 bool qemu_cpu_is_self(CPUState *cpu)
940 {
941 return qemu_thread_is_self(cpu->thread);
942 }
943
944 static bool qemu_in_vcpu_thread(void)
945 {
946 return current_cpu && qemu_cpu_is_self(current_cpu);
947 }
948
949 void qemu_mutex_lock_iothread(void)
950 {
951 if (!tcg_enabled()) {
952 qemu_mutex_lock(&qemu_global_mutex);
953 } else {
954 iothread_requesting_mutex = true;
955 if (qemu_mutex_trylock(&qemu_global_mutex)) {
956 qemu_cpu_kick_thread(first_cpu);
957 qemu_mutex_lock(&qemu_global_mutex);
958 }
959 iothread_requesting_mutex = false;
960 qemu_cond_broadcast(&qemu_io_proceeded_cond);
961 }
962 }
963
964 void qemu_mutex_unlock_iothread(void)
965 {
966 qemu_mutex_unlock(&qemu_global_mutex);
967 }
968
969 static int all_vcpus_paused(void)
970 {
971 CPUState *cpu = first_cpu;
972
973 while (cpu) {
974 if (!cpu->stopped) {
975 return 0;
976 }
977 cpu = cpu->next_cpu;
978 }
979
980 return 1;
981 }
982
983 void pause_all_vcpus(void)
984 {
985 CPUState *cpu = first_cpu;
986
987 qemu_clock_enable(vm_clock, false);
988 while (cpu) {
989 cpu->stop = true;
990 qemu_cpu_kick(cpu);
991 cpu = cpu->next_cpu;
992 }
993
994 if (qemu_in_vcpu_thread()) {
995 cpu_stop_current();
996 if (!kvm_enabled()) {
997 cpu = first_cpu;
998 while (cpu) {
999 cpu->stop = false;
1000 cpu->stopped = true;
1001 cpu = cpu->next_cpu;
1002 }
1003 return;
1004 }
1005 }
1006
1007 while (!all_vcpus_paused()) {
1008 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1009 cpu = first_cpu;
1010 while (cpu) {
1011 qemu_cpu_kick(cpu);
1012 cpu = cpu->next_cpu;
1013 }
1014 }
1015 }
1016
1017 void cpu_resume(CPUState *cpu)
1018 {
1019 cpu->stop = false;
1020 cpu->stopped = false;
1021 qemu_cpu_kick(cpu);
1022 }
1023
1024 void resume_all_vcpus(void)
1025 {
1026 CPUState *cpu = first_cpu;
1027
1028 qemu_clock_enable(vm_clock, true);
1029 while (cpu) {
1030 cpu_resume(cpu);
1031 cpu = cpu->next_cpu;
1032 }
1033 }
1034
1035 static void qemu_tcg_init_vcpu(CPUState *cpu)
1036 {
1037 /* share a single thread for all cpus with TCG */
1038 if (!tcg_cpu_thread) {
1039 cpu->thread = g_malloc0(sizeof(QemuThread));
1040 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1041 qemu_cond_init(cpu->halt_cond);
1042 tcg_halt_cond = cpu->halt_cond;
1043 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1044 QEMU_THREAD_JOINABLE);
1045 #ifdef _WIN32
1046 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1047 #endif
1048 while (!cpu->created) {
1049 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1050 }
1051 tcg_cpu_thread = cpu->thread;
1052 } else {
1053 cpu->thread = tcg_cpu_thread;
1054 cpu->halt_cond = tcg_halt_cond;
1055 }
1056 }
1057
1058 static void qemu_kvm_start_vcpu(CPUState *cpu)
1059 {
1060 cpu->thread = g_malloc0(sizeof(QemuThread));
1061 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1062 qemu_cond_init(cpu->halt_cond);
1063 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1064 QEMU_THREAD_JOINABLE);
1065 while (!cpu->created) {
1066 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1067 }
1068 }
1069
1070 static void qemu_dummy_start_vcpu(CPUState *cpu)
1071 {
1072 cpu->thread = g_malloc0(sizeof(QemuThread));
1073 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1074 qemu_cond_init(cpu->halt_cond);
1075 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1076 QEMU_THREAD_JOINABLE);
1077 while (!cpu->created) {
1078 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1079 }
1080 }
1081
1082 void qemu_init_vcpu(CPUState *cpu)
1083 {
1084 cpu->nr_cores = smp_cores;
1085 cpu->nr_threads = smp_threads;
1086 cpu->stopped = true;
1087 if (kvm_enabled()) {
1088 qemu_kvm_start_vcpu(cpu);
1089 } else if (tcg_enabled()) {
1090 qemu_tcg_init_vcpu(cpu);
1091 } else {
1092 qemu_dummy_start_vcpu(cpu);
1093 }
1094 }
1095
1096 void cpu_stop_current(void)
1097 {
1098 if (current_cpu) {
1099 current_cpu->stop = false;
1100 current_cpu->stopped = true;
1101 cpu_exit(current_cpu);
1102 qemu_cond_signal(&qemu_pause_cond);
1103 }
1104 }
1105
1106 int vm_stop(RunState state)
1107 {
1108 if (qemu_in_vcpu_thread()) {
1109 qemu_system_vmstop_request(state);
1110 /*
1111 * FIXME: should not return to device code in case
1112 * vm_stop() has been requested.
1113 */
1114 cpu_stop_current();
1115 return 0;
1116 }
1117
1118 return do_vm_stop(state);
1119 }
1120
1121 /* does a state transition even if the VM is already stopped,
1122 current state is forgotten forever */
1123 int vm_stop_force_state(RunState state)
1124 {
1125 if (runstate_is_running()) {
1126 return vm_stop(state);
1127 } else {
1128 runstate_set(state);
1129 return 0;
1130 }
1131 }
1132
1133 static int tcg_cpu_exec(CPUArchState *env)
1134 {
1135 int ret;
1136 #ifdef CONFIG_PROFILER
1137 int64_t ti;
1138 #endif
1139
1140 #ifdef CONFIG_PROFILER
1141 ti = profile_getclock();
1142 #endif
1143 if (use_icount) {
1144 int64_t count;
1145 int decr;
1146 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1147 env->icount_decr.u16.low = 0;
1148 env->icount_extra = 0;
1149 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1150 qemu_icount += count;
1151 decr = (count > 0xffff) ? 0xffff : count;
1152 count -= decr;
1153 env->icount_decr.u16.low = decr;
1154 env->icount_extra = count;
1155 }
1156 ret = cpu_exec(env);
1157 #ifdef CONFIG_PROFILER
1158 qemu_time += profile_getclock() - ti;
1159 #endif
1160 if (use_icount) {
1161 /* Fold pending instructions back into the
1162 instruction counter, and clear the interrupt flag. */
1163 qemu_icount -= (env->icount_decr.u16.low
1164 + env->icount_extra);
1165 env->icount_decr.u32 = 0;
1166 env->icount_extra = 0;
1167 }
1168 return ret;
1169 }
1170
1171 static void tcg_exec_all(void)
1172 {
1173 int r;
1174
1175 /* Account partial waits to the vm_clock. */
1176 qemu_clock_warp(vm_clock);
1177
1178 if (next_cpu == NULL) {
1179 next_cpu = first_cpu;
1180 }
1181 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1182 CPUState *cpu = next_cpu;
1183 CPUArchState *env = cpu->env_ptr;
1184
1185 qemu_clock_enable(vm_clock,
1186 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1187
1188 if (cpu_can_run(cpu)) {
1189 r = tcg_cpu_exec(env);
1190 if (r == EXCP_DEBUG) {
1191 cpu_handle_guest_debug(cpu);
1192 break;
1193 }
1194 } else if (cpu->stop || cpu->stopped) {
1195 break;
1196 }
1197 }
1198 exit_request = 0;
1199 }
1200
1201 void set_numa_modes(void)
1202 {
1203 CPUState *cpu;
1204 int i;
1205
1206 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1207 for (i = 0; i < nb_numa_nodes; i++) {
1208 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1209 cpu->numa_node = i;
1210 }
1211 }
1212 }
1213 }
1214
1215 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1216 {
1217 /* XXX: implement xxx_cpu_list for targets that still miss it */
1218 #if defined(cpu_list)
1219 cpu_list(f, cpu_fprintf);
1220 #endif
1221 }
1222
1223 CpuInfoList *qmp_query_cpus(Error **errp)
1224 {
1225 CpuInfoList *head = NULL, *cur_item = NULL;
1226 CPUState *cpu;
1227
1228 for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
1229 CpuInfoList *info;
1230 #if defined(TARGET_I386)
1231 X86CPU *x86_cpu = X86_CPU(cpu);
1232 CPUX86State *env = &x86_cpu->env;
1233 #elif defined(TARGET_PPC)
1234 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1235 CPUPPCState *env = &ppc_cpu->env;
1236 #elif defined(TARGET_SPARC)
1237 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1238 CPUSPARCState *env = &sparc_cpu->env;
1239 #elif defined(TARGET_MIPS)
1240 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1241 CPUMIPSState *env = &mips_cpu->env;
1242 #endif
1243
1244 cpu_synchronize_state(cpu);
1245
1246 info = g_malloc0(sizeof(*info));
1247 info->value = g_malloc0(sizeof(*info->value));
1248 info->value->CPU = cpu->cpu_index;
1249 info->value->current = (cpu == first_cpu);
1250 info->value->halted = cpu->halted;
1251 info->value->thread_id = cpu->thread_id;
1252 #if defined(TARGET_I386)
1253 info->value->has_pc = true;
1254 info->value->pc = env->eip + env->segs[R_CS].base;
1255 #elif defined(TARGET_PPC)
1256 info->value->has_nip = true;
1257 info->value->nip = env->nip;
1258 #elif defined(TARGET_SPARC)
1259 info->value->has_pc = true;
1260 info->value->pc = env->pc;
1261 info->value->has_npc = true;
1262 info->value->npc = env->npc;
1263 #elif defined(TARGET_MIPS)
1264 info->value->has_PC = true;
1265 info->value->PC = env->active_tc.PC;
1266 #endif
1267
1268 /* XXX: waiting for the qapi to support GSList */
1269 if (!cur_item) {
1270 head = cur_item = info;
1271 } else {
1272 cur_item->next = info;
1273 cur_item = info;
1274 }
1275 }
1276
1277 return head;
1278 }
1279
1280 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1281 bool has_cpu, int64_t cpu_index, Error **errp)
1282 {
1283 FILE *f;
1284 uint32_t l;
1285 CPUArchState *env;
1286 CPUState *cpu;
1287 uint8_t buf[1024];
1288
1289 if (!has_cpu) {
1290 cpu_index = 0;
1291 }
1292
1293 cpu = qemu_get_cpu(cpu_index);
1294 if (cpu == NULL) {
1295 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1296 "a CPU number");
1297 return;
1298 }
1299 env = cpu->env_ptr;
1300
1301 f = fopen(filename, "wb");
1302 if (!f) {
1303 error_setg_file_open(errp, errno, filename);
1304 return;
1305 }
1306
1307 while (size != 0) {
1308 l = sizeof(buf);
1309 if (l > size)
1310 l = size;
1311 cpu_memory_rw_debug(env, addr, buf, l, 0);
1312 if (fwrite(buf, 1, l, f) != l) {
1313 error_set(errp, QERR_IO_ERROR);
1314 goto exit;
1315 }
1316 addr += l;
1317 size -= l;
1318 }
1319
1320 exit:
1321 fclose(f);
1322 }
1323
1324 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1325 Error **errp)
1326 {
1327 FILE *f;
1328 uint32_t l;
1329 uint8_t buf[1024];
1330
1331 f = fopen(filename, "wb");
1332 if (!f) {
1333 error_setg_file_open(errp, errno, filename);
1334 return;
1335 }
1336
1337 while (size != 0) {
1338 l = sizeof(buf);
1339 if (l > size)
1340 l = size;
1341 cpu_physical_memory_rw(addr, buf, l, 0);
1342 if (fwrite(buf, 1, l, f) != l) {
1343 error_set(errp, QERR_IO_ERROR);
1344 goto exit;
1345 }
1346 addr += l;
1347 size -= l;
1348 }
1349
1350 exit:
1351 fclose(f);
1352 }
1353
1354 void qmp_inject_nmi(Error **errp)
1355 {
1356 #if defined(TARGET_I386)
1357 CPUState *cs;
1358
1359 for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
1360 X86CPU *cpu = X86_CPU(cs);
1361 CPUX86State *env = &cpu->env;
1362
1363 if (!env->apic_state) {
1364 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1365 } else {
1366 apic_deliver_nmi(env->apic_state);
1367 }
1368 }
1369 #else
1370 error_set(errp, QERR_UNSUPPORTED);
1371 #endif
1372 }