]> git.proxmox.com Git - qemu.git/blob - cpus.c
Documentation: Move balloon option out of i386 only section
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor.h"
29 #include "sysemu.h"
30 #include "gdbstub.h"
31 #include "dma.h"
32 #include "kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu-thread.h"
36 #include "cpus.h"
37 #include "main-loop.h"
38
39 #ifndef _WIN32
40 #include "compatfd.h"
41 #endif
42
43 #ifdef CONFIG_LINUX
44
45 #include <sys/prctl.h>
46
47 #ifndef PR_MCE_KILL
48 #define PR_MCE_KILL 33
49 #endif
50
51 #ifndef PR_MCE_KILL_SET
52 #define PR_MCE_KILL_SET 1
53 #endif
54
55 #ifndef PR_MCE_KILL_EARLY
56 #define PR_MCE_KILL_EARLY 1
57 #endif
58
59 #endif /* CONFIG_LINUX */
60
61 static CPUState *next_cpu;
62
63 /***********************************************************/
64 /* guest cycle counter */
65
66 /* Conversion factor from emulated instructions to virtual clock ticks. */
67 static int icount_time_shift;
68 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
69 #define MAX_ICOUNT_SHIFT 10
70 /* Compensate for varying guest execution speed. */
71 static int64_t qemu_icount_bias;
72 static QEMUTimer *icount_rt_timer;
73 static QEMUTimer *icount_vm_timer;
74 static QEMUTimer *icount_warp_timer;
75 static int64_t vm_clock_warp_start;
76 static int64_t qemu_icount;
77
78 typedef struct TimersState {
79 int64_t cpu_ticks_prev;
80 int64_t cpu_ticks_offset;
81 int64_t cpu_clock_offset;
82 int32_t cpu_ticks_enabled;
83 int64_t dummy;
84 } TimersState;
85
86 TimersState timers_state;
87
88 /* Return the virtual CPU time, based on the instruction counter. */
89 int64_t cpu_get_icount(void)
90 {
91 int64_t icount;
92 CPUState *env = cpu_single_env;
93
94 icount = qemu_icount;
95 if (env) {
96 if (!can_do_io(env)) {
97 fprintf(stderr, "Bad clock read\n");
98 }
99 icount -= (env->icount_decr.u16.low + env->icount_extra);
100 }
101 return qemu_icount_bias + (icount << icount_time_shift);
102 }
103
104 /* return the host CPU cycle counter and handle stop/restart */
105 int64_t cpu_get_ticks(void)
106 {
107 if (use_icount) {
108 return cpu_get_icount();
109 }
110 if (!timers_state.cpu_ticks_enabled) {
111 return timers_state.cpu_ticks_offset;
112 } else {
113 int64_t ticks;
114 ticks = cpu_get_real_ticks();
115 if (timers_state.cpu_ticks_prev > ticks) {
116 /* Note: non increasing ticks may happen if the host uses
117 software suspend */
118 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
119 }
120 timers_state.cpu_ticks_prev = ticks;
121 return ticks + timers_state.cpu_ticks_offset;
122 }
123 }
124
125 /* return the host CPU monotonic timer and handle stop/restart */
126 int64_t cpu_get_clock(void)
127 {
128 int64_t ti;
129 if (!timers_state.cpu_ticks_enabled) {
130 return timers_state.cpu_clock_offset;
131 } else {
132 ti = get_clock();
133 return ti + timers_state.cpu_clock_offset;
134 }
135 }
136
137 /* enable cpu_get_ticks() */
138 void cpu_enable_ticks(void)
139 {
140 if (!timers_state.cpu_ticks_enabled) {
141 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
142 timers_state.cpu_clock_offset -= get_clock();
143 timers_state.cpu_ticks_enabled = 1;
144 }
145 }
146
147 /* disable cpu_get_ticks() : the clock is stopped. You must not call
148 cpu_get_ticks() after that. */
149 void cpu_disable_ticks(void)
150 {
151 if (timers_state.cpu_ticks_enabled) {
152 timers_state.cpu_ticks_offset = cpu_get_ticks();
153 timers_state.cpu_clock_offset = cpu_get_clock();
154 timers_state.cpu_ticks_enabled = 0;
155 }
156 }
157
158 /* Correlation between real and virtual time is always going to be
159 fairly approximate, so ignore small variation.
160 When the guest is idle real and virtual time will be aligned in
161 the IO wait loop. */
162 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
163
164 static void icount_adjust(void)
165 {
166 int64_t cur_time;
167 int64_t cur_icount;
168 int64_t delta;
169 static int64_t last_delta;
170 /* If the VM is not running, then do nothing. */
171 if (!runstate_is_running()) {
172 return;
173 }
174 cur_time = cpu_get_clock();
175 cur_icount = qemu_get_clock_ns(vm_clock);
176 delta = cur_icount - cur_time;
177 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
178 if (delta > 0
179 && last_delta + ICOUNT_WOBBLE < delta * 2
180 && icount_time_shift > 0) {
181 /* The guest is getting too far ahead. Slow time down. */
182 icount_time_shift--;
183 }
184 if (delta < 0
185 && last_delta - ICOUNT_WOBBLE > delta * 2
186 && icount_time_shift < MAX_ICOUNT_SHIFT) {
187 /* The guest is getting too far behind. Speed time up. */
188 icount_time_shift++;
189 }
190 last_delta = delta;
191 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
192 }
193
194 static void icount_adjust_rt(void *opaque)
195 {
196 qemu_mod_timer(icount_rt_timer,
197 qemu_get_clock_ms(rt_clock) + 1000);
198 icount_adjust();
199 }
200
201 static void icount_adjust_vm(void *opaque)
202 {
203 qemu_mod_timer(icount_vm_timer,
204 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
205 icount_adjust();
206 }
207
208 static int64_t qemu_icount_round(int64_t count)
209 {
210 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
211 }
212
213 static void icount_warp_rt(void *opaque)
214 {
215 if (vm_clock_warp_start == -1) {
216 return;
217 }
218
219 if (runstate_is_running()) {
220 int64_t clock = qemu_get_clock_ns(rt_clock);
221 int64_t warp_delta = clock - vm_clock_warp_start;
222 if (use_icount == 1) {
223 qemu_icount_bias += warp_delta;
224 } else {
225 /*
226 * In adaptive mode, do not let the vm_clock run too
227 * far ahead of real time.
228 */
229 int64_t cur_time = cpu_get_clock();
230 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
231 int64_t delta = cur_time - cur_icount;
232 qemu_icount_bias += MIN(warp_delta, delta);
233 }
234 if (qemu_clock_expired(vm_clock)) {
235 qemu_notify_event();
236 }
237 }
238 vm_clock_warp_start = -1;
239 }
240
241 void qemu_clock_warp(QEMUClock *clock)
242 {
243 int64_t deadline;
244
245 /*
246 * There are too many global variables to make the "warp" behavior
247 * applicable to other clocks. But a clock argument removes the
248 * need for if statements all over the place.
249 */
250 if (clock != vm_clock || !use_icount) {
251 return;
252 }
253
254 /*
255 * If the CPUs have been sleeping, advance the vm_clock timer now. This
256 * ensures that the deadline for the timer is computed correctly below.
257 * This also makes sure that the insn counter is synchronized before the
258 * CPU starts running, in case the CPU is woken by an event other than
259 * the earliest vm_clock timer.
260 */
261 icount_warp_rt(NULL);
262 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
263 qemu_del_timer(icount_warp_timer);
264 return;
265 }
266
267 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
268 deadline = qemu_clock_deadline(vm_clock);
269 if (deadline > 0) {
270 /*
271 * Ensure the vm_clock proceeds even when the virtual CPU goes to
272 * sleep. Otherwise, the CPU might be waiting for a future timer
273 * interrupt to wake it up, but the interrupt never comes because
274 * the vCPU isn't running any insns and thus doesn't advance the
275 * vm_clock.
276 *
277 * An extreme solution for this problem would be to never let VCPUs
278 * sleep in icount mode if there is a pending vm_clock timer; rather
279 * time could just advance to the next vm_clock event. Instead, we
280 * do stop VCPUs and only advance vm_clock after some "real" time,
281 * (related to the time left until the next event) has passed. This
282 * rt_clock timer will do this. This avoids that the warps are too
283 * visible externally---for example, you will not be sending network
284 * packets continuously instead of every 100ms.
285 */
286 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
287 } else {
288 qemu_notify_event();
289 }
290 }
291
292 static const VMStateDescription vmstate_timers = {
293 .name = "timer",
294 .version_id = 2,
295 .minimum_version_id = 1,
296 .minimum_version_id_old = 1,
297 .fields = (VMStateField[]) {
298 VMSTATE_INT64(cpu_ticks_offset, TimersState),
299 VMSTATE_INT64(dummy, TimersState),
300 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
301 VMSTATE_END_OF_LIST()
302 }
303 };
304
305 void configure_icount(const char *option)
306 {
307 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
308 if (!option) {
309 return;
310 }
311
312 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
313 if (strcmp(option, "auto") != 0) {
314 icount_time_shift = strtol(option, NULL, 0);
315 use_icount = 1;
316 return;
317 }
318
319 use_icount = 2;
320
321 /* 125MIPS seems a reasonable initial guess at the guest speed.
322 It will be corrected fairly quickly anyway. */
323 icount_time_shift = 3;
324
325 /* Have both realtime and virtual time triggers for speed adjustment.
326 The realtime trigger catches emulated time passing too slowly,
327 the virtual time trigger catches emulated time passing too fast.
328 Realtime triggers occur even when idle, so use them less frequently
329 than VM triggers. */
330 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
331 qemu_mod_timer(icount_rt_timer,
332 qemu_get_clock_ms(rt_clock) + 1000);
333 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
334 qemu_mod_timer(icount_vm_timer,
335 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
336 }
337
338 /***********************************************************/
339 void hw_error(const char *fmt, ...)
340 {
341 va_list ap;
342 CPUState *env;
343
344 va_start(ap, fmt);
345 fprintf(stderr, "qemu: hardware error: ");
346 vfprintf(stderr, fmt, ap);
347 fprintf(stderr, "\n");
348 for(env = first_cpu; env != NULL; env = env->next_cpu) {
349 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
350 #ifdef TARGET_I386
351 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
352 #else
353 cpu_dump_state(env, stderr, fprintf, 0);
354 #endif
355 }
356 va_end(ap);
357 abort();
358 }
359
360 void cpu_synchronize_all_states(void)
361 {
362 CPUState *cpu;
363
364 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
365 cpu_synchronize_state(cpu);
366 }
367 }
368
369 void cpu_synchronize_all_post_reset(void)
370 {
371 CPUState *cpu;
372
373 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
374 cpu_synchronize_post_reset(cpu);
375 }
376 }
377
378 void cpu_synchronize_all_post_init(void)
379 {
380 CPUState *cpu;
381
382 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
383 cpu_synchronize_post_init(cpu);
384 }
385 }
386
387 int cpu_is_stopped(CPUState *env)
388 {
389 return !runstate_is_running() || env->stopped;
390 }
391
392 static void do_vm_stop(RunState state)
393 {
394 if (runstate_is_running()) {
395 cpu_disable_ticks();
396 pause_all_vcpus();
397 runstate_set(state);
398 vm_state_notify(0, state);
399 bdrv_drain_all();
400 bdrv_flush_all();
401 monitor_protocol_event(QEVENT_STOP, NULL);
402 }
403 }
404
405 static int cpu_can_run(CPUState *env)
406 {
407 if (env->stop) {
408 return 0;
409 }
410 if (env->stopped || !runstate_is_running()) {
411 return 0;
412 }
413 return 1;
414 }
415
416 static bool cpu_thread_is_idle(CPUState *env)
417 {
418 if (env->stop || env->queued_work_first) {
419 return false;
420 }
421 if (env->stopped || !runstate_is_running()) {
422 return true;
423 }
424 if (!env->halted || qemu_cpu_has_work(env) ||
425 (kvm_enabled() && kvm_irqchip_in_kernel())) {
426 return false;
427 }
428 return true;
429 }
430
431 bool all_cpu_threads_idle(void)
432 {
433 CPUState *env;
434
435 for (env = first_cpu; env != NULL; env = env->next_cpu) {
436 if (!cpu_thread_is_idle(env)) {
437 return false;
438 }
439 }
440 return true;
441 }
442
443 static void cpu_handle_guest_debug(CPUState *env)
444 {
445 gdb_set_stop_cpu(env);
446 qemu_system_debug_request();
447 env->stopped = 1;
448 }
449
450 static void cpu_signal(int sig)
451 {
452 if (cpu_single_env) {
453 cpu_exit(cpu_single_env);
454 }
455 exit_request = 1;
456 }
457
458 #ifdef CONFIG_LINUX
459 static void sigbus_reraise(void)
460 {
461 sigset_t set;
462 struct sigaction action;
463
464 memset(&action, 0, sizeof(action));
465 action.sa_handler = SIG_DFL;
466 if (!sigaction(SIGBUS, &action, NULL)) {
467 raise(SIGBUS);
468 sigemptyset(&set);
469 sigaddset(&set, SIGBUS);
470 sigprocmask(SIG_UNBLOCK, &set, NULL);
471 }
472 perror("Failed to re-raise SIGBUS!\n");
473 abort();
474 }
475
476 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
477 void *ctx)
478 {
479 if (kvm_on_sigbus(siginfo->ssi_code,
480 (void *)(intptr_t)siginfo->ssi_addr)) {
481 sigbus_reraise();
482 }
483 }
484
485 static void qemu_init_sigbus(void)
486 {
487 struct sigaction action;
488
489 memset(&action, 0, sizeof(action));
490 action.sa_flags = SA_SIGINFO;
491 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
492 sigaction(SIGBUS, &action, NULL);
493
494 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
495 }
496
497 static void qemu_kvm_eat_signals(CPUState *env)
498 {
499 struct timespec ts = { 0, 0 };
500 siginfo_t siginfo;
501 sigset_t waitset;
502 sigset_t chkset;
503 int r;
504
505 sigemptyset(&waitset);
506 sigaddset(&waitset, SIG_IPI);
507 sigaddset(&waitset, SIGBUS);
508
509 do {
510 r = sigtimedwait(&waitset, &siginfo, &ts);
511 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
512 perror("sigtimedwait");
513 exit(1);
514 }
515
516 switch (r) {
517 case SIGBUS:
518 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
519 sigbus_reraise();
520 }
521 break;
522 default:
523 break;
524 }
525
526 r = sigpending(&chkset);
527 if (r == -1) {
528 perror("sigpending");
529 exit(1);
530 }
531 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
532 }
533
534 #else /* !CONFIG_LINUX */
535
536 static void qemu_init_sigbus(void)
537 {
538 }
539
540 static void qemu_kvm_eat_signals(CPUState *env)
541 {
542 }
543 #endif /* !CONFIG_LINUX */
544
545 #ifndef _WIN32
546 static void dummy_signal(int sig)
547 {
548 }
549
550 static void qemu_kvm_init_cpu_signals(CPUState *env)
551 {
552 int r;
553 sigset_t set;
554 struct sigaction sigact;
555
556 memset(&sigact, 0, sizeof(sigact));
557 sigact.sa_handler = dummy_signal;
558 sigaction(SIG_IPI, &sigact, NULL);
559
560 pthread_sigmask(SIG_BLOCK, NULL, &set);
561 sigdelset(&set, SIG_IPI);
562 sigdelset(&set, SIGBUS);
563 r = kvm_set_signal_mask(env, &set);
564 if (r) {
565 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
566 exit(1);
567 }
568
569 sigdelset(&set, SIG_IPI);
570 sigdelset(&set, SIGBUS);
571 r = kvm_set_signal_mask(env, &set);
572 if (r) {
573 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
574 exit(1);
575 }
576 }
577
578 static void qemu_tcg_init_cpu_signals(void)
579 {
580 sigset_t set;
581 struct sigaction sigact;
582
583 memset(&sigact, 0, sizeof(sigact));
584 sigact.sa_handler = cpu_signal;
585 sigaction(SIG_IPI, &sigact, NULL);
586
587 sigemptyset(&set);
588 sigaddset(&set, SIG_IPI);
589 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
590 }
591
592 #else /* _WIN32 */
593 static void qemu_kvm_init_cpu_signals(CPUState *env)
594 {
595 abort();
596 }
597
598 static void qemu_tcg_init_cpu_signals(void)
599 {
600 }
601 #endif /* _WIN32 */
602
603 QemuMutex qemu_global_mutex;
604 static QemuCond qemu_io_proceeded_cond;
605 static bool iothread_requesting_mutex;
606
607 static QemuThread io_thread;
608
609 static QemuThread *tcg_cpu_thread;
610 static QemuCond *tcg_halt_cond;
611
612 /* cpu creation */
613 static QemuCond qemu_cpu_cond;
614 /* system init */
615 static QemuCond qemu_pause_cond;
616 static QemuCond qemu_work_cond;
617
618 void qemu_init_cpu_loop(void)
619 {
620 qemu_init_sigbus();
621 qemu_cond_init(&qemu_cpu_cond);
622 qemu_cond_init(&qemu_pause_cond);
623 qemu_cond_init(&qemu_work_cond);
624 qemu_cond_init(&qemu_io_proceeded_cond);
625 qemu_mutex_init(&qemu_global_mutex);
626
627 qemu_thread_get_self(&io_thread);
628 }
629
630 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
631 {
632 struct qemu_work_item wi;
633
634 if (qemu_cpu_is_self(env)) {
635 func(data);
636 return;
637 }
638
639 wi.func = func;
640 wi.data = data;
641 if (!env->queued_work_first) {
642 env->queued_work_first = &wi;
643 } else {
644 env->queued_work_last->next = &wi;
645 }
646 env->queued_work_last = &wi;
647 wi.next = NULL;
648 wi.done = false;
649
650 qemu_cpu_kick(env);
651 while (!wi.done) {
652 CPUState *self_env = cpu_single_env;
653
654 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
655 cpu_single_env = self_env;
656 }
657 }
658
659 static void flush_queued_work(CPUState *env)
660 {
661 struct qemu_work_item *wi;
662
663 if (!env->queued_work_first) {
664 return;
665 }
666
667 while ((wi = env->queued_work_first)) {
668 env->queued_work_first = wi->next;
669 wi->func(wi->data);
670 wi->done = true;
671 }
672 env->queued_work_last = NULL;
673 qemu_cond_broadcast(&qemu_work_cond);
674 }
675
676 static void qemu_wait_io_event_common(CPUState *env)
677 {
678 if (env->stop) {
679 env->stop = 0;
680 env->stopped = 1;
681 qemu_cond_signal(&qemu_pause_cond);
682 }
683 flush_queued_work(env);
684 env->thread_kicked = false;
685 }
686
687 static void qemu_tcg_wait_io_event(void)
688 {
689 CPUState *env;
690
691 while (all_cpu_threads_idle()) {
692 /* Start accounting real time to the virtual clock if the CPUs
693 are idle. */
694 qemu_clock_warp(vm_clock);
695 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
696 }
697
698 while (iothread_requesting_mutex) {
699 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
700 }
701
702 for (env = first_cpu; env != NULL; env = env->next_cpu) {
703 qemu_wait_io_event_common(env);
704 }
705 }
706
707 static void qemu_kvm_wait_io_event(CPUState *env)
708 {
709 while (cpu_thread_is_idle(env)) {
710 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
711 }
712
713 qemu_kvm_eat_signals(env);
714 qemu_wait_io_event_common(env);
715 }
716
717 static void *qemu_kvm_cpu_thread_fn(void *arg)
718 {
719 CPUState *env = arg;
720 int r;
721
722 qemu_mutex_lock(&qemu_global_mutex);
723 qemu_thread_get_self(env->thread);
724 env->thread_id = qemu_get_thread_id();
725
726 r = kvm_init_vcpu(env);
727 if (r < 0) {
728 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
729 exit(1);
730 }
731
732 qemu_kvm_init_cpu_signals(env);
733
734 /* signal CPU creation */
735 env->created = 1;
736 qemu_cond_signal(&qemu_cpu_cond);
737
738 while (1) {
739 if (cpu_can_run(env)) {
740 r = kvm_cpu_exec(env);
741 if (r == EXCP_DEBUG) {
742 cpu_handle_guest_debug(env);
743 }
744 }
745 qemu_kvm_wait_io_event(env);
746 }
747
748 return NULL;
749 }
750
751 static void tcg_exec_all(void);
752
753 static void *qemu_tcg_cpu_thread_fn(void *arg)
754 {
755 CPUState *env = arg;
756
757 qemu_tcg_init_cpu_signals();
758 qemu_thread_get_self(env->thread);
759
760 /* signal CPU creation */
761 qemu_mutex_lock(&qemu_global_mutex);
762 for (env = first_cpu; env != NULL; env = env->next_cpu) {
763 env->thread_id = qemu_get_thread_id();
764 env->created = 1;
765 }
766 qemu_cond_signal(&qemu_cpu_cond);
767
768 /* wait for initial kick-off after machine start */
769 while (first_cpu->stopped) {
770 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
771 }
772
773 while (1) {
774 tcg_exec_all();
775 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
776 qemu_notify_event();
777 }
778 qemu_tcg_wait_io_event();
779 }
780
781 return NULL;
782 }
783
784 static void qemu_cpu_kick_thread(CPUState *env)
785 {
786 #ifndef _WIN32
787 int err;
788
789 err = pthread_kill(env->thread->thread, SIG_IPI);
790 if (err) {
791 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
792 exit(1);
793 }
794 #else /* _WIN32 */
795 if (!qemu_cpu_is_self(env)) {
796 SuspendThread(env->hThread);
797 cpu_signal(0);
798 ResumeThread(env->hThread);
799 }
800 #endif
801 }
802
803 void qemu_cpu_kick(void *_env)
804 {
805 CPUState *env = _env;
806
807 qemu_cond_broadcast(env->halt_cond);
808 if (kvm_enabled() && !env->thread_kicked) {
809 qemu_cpu_kick_thread(env);
810 env->thread_kicked = true;
811 }
812 }
813
814 void qemu_cpu_kick_self(void)
815 {
816 #ifndef _WIN32
817 assert(cpu_single_env);
818
819 if (!cpu_single_env->thread_kicked) {
820 qemu_cpu_kick_thread(cpu_single_env);
821 cpu_single_env->thread_kicked = true;
822 }
823 #else
824 abort();
825 #endif
826 }
827
828 int qemu_cpu_is_self(void *_env)
829 {
830 CPUState *env = _env;
831
832 return qemu_thread_is_self(env->thread);
833 }
834
835 void qemu_mutex_lock_iothread(void)
836 {
837 if (kvm_enabled()) {
838 qemu_mutex_lock(&qemu_global_mutex);
839 } else {
840 iothread_requesting_mutex = true;
841 if (qemu_mutex_trylock(&qemu_global_mutex)) {
842 qemu_cpu_kick_thread(first_cpu);
843 qemu_mutex_lock(&qemu_global_mutex);
844 }
845 iothread_requesting_mutex = false;
846 qemu_cond_broadcast(&qemu_io_proceeded_cond);
847 }
848 }
849
850 void qemu_mutex_unlock_iothread(void)
851 {
852 qemu_mutex_unlock(&qemu_global_mutex);
853 }
854
855 static int all_vcpus_paused(void)
856 {
857 CPUState *penv = first_cpu;
858
859 while (penv) {
860 if (!penv->stopped) {
861 return 0;
862 }
863 penv = (CPUState *)penv->next_cpu;
864 }
865
866 return 1;
867 }
868
869 void pause_all_vcpus(void)
870 {
871 CPUState *penv = first_cpu;
872
873 qemu_clock_enable(vm_clock, false);
874 while (penv) {
875 penv->stop = 1;
876 qemu_cpu_kick(penv);
877 penv = (CPUState *)penv->next_cpu;
878 }
879
880 while (!all_vcpus_paused()) {
881 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
882 penv = first_cpu;
883 while (penv) {
884 qemu_cpu_kick(penv);
885 penv = (CPUState *)penv->next_cpu;
886 }
887 }
888 }
889
890 void resume_all_vcpus(void)
891 {
892 CPUState *penv = first_cpu;
893
894 qemu_clock_enable(vm_clock, true);
895 while (penv) {
896 penv->stop = 0;
897 penv->stopped = 0;
898 qemu_cpu_kick(penv);
899 penv = (CPUState *)penv->next_cpu;
900 }
901 }
902
903 static void qemu_tcg_init_vcpu(void *_env)
904 {
905 CPUState *env = _env;
906
907 /* share a single thread for all cpus with TCG */
908 if (!tcg_cpu_thread) {
909 env->thread = g_malloc0(sizeof(QemuThread));
910 env->halt_cond = g_malloc0(sizeof(QemuCond));
911 qemu_cond_init(env->halt_cond);
912 tcg_halt_cond = env->halt_cond;
913 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env,
914 QEMU_THREAD_JOINABLE);
915 #ifdef _WIN32
916 env->hThread = qemu_thread_get_handle(env->thread);
917 #endif
918 while (env->created == 0) {
919 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
920 }
921 tcg_cpu_thread = env->thread;
922 } else {
923 env->thread = tcg_cpu_thread;
924 env->halt_cond = tcg_halt_cond;
925 }
926 }
927
928 static void qemu_kvm_start_vcpu(CPUState *env)
929 {
930 env->thread = g_malloc0(sizeof(QemuThread));
931 env->halt_cond = g_malloc0(sizeof(QemuCond));
932 qemu_cond_init(env->halt_cond);
933 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env,
934 QEMU_THREAD_JOINABLE);
935 while (env->created == 0) {
936 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
937 }
938 }
939
940 void qemu_init_vcpu(void *_env)
941 {
942 CPUState *env = _env;
943
944 env->nr_cores = smp_cores;
945 env->nr_threads = smp_threads;
946 env->stopped = 1;
947 if (kvm_enabled()) {
948 qemu_kvm_start_vcpu(env);
949 } else {
950 qemu_tcg_init_vcpu(env);
951 }
952 }
953
954 void cpu_stop_current(void)
955 {
956 if (cpu_single_env) {
957 cpu_single_env->stop = 0;
958 cpu_single_env->stopped = 1;
959 cpu_exit(cpu_single_env);
960 qemu_cond_signal(&qemu_pause_cond);
961 }
962 }
963
964 void vm_stop(RunState state)
965 {
966 if (!qemu_thread_is_self(&io_thread)) {
967 qemu_system_vmstop_request(state);
968 /*
969 * FIXME: should not return to device code in case
970 * vm_stop() has been requested.
971 */
972 cpu_stop_current();
973 return;
974 }
975 do_vm_stop(state);
976 }
977
978 /* does a state transition even if the VM is already stopped,
979 current state is forgotten forever */
980 void vm_stop_force_state(RunState state)
981 {
982 if (runstate_is_running()) {
983 vm_stop(state);
984 } else {
985 runstate_set(state);
986 }
987 }
988
989 static int tcg_cpu_exec(CPUState *env)
990 {
991 int ret;
992 #ifdef CONFIG_PROFILER
993 int64_t ti;
994 #endif
995
996 #ifdef CONFIG_PROFILER
997 ti = profile_getclock();
998 #endif
999 if (use_icount) {
1000 int64_t count;
1001 int decr;
1002 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1003 env->icount_decr.u16.low = 0;
1004 env->icount_extra = 0;
1005 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1006 qemu_icount += count;
1007 decr = (count > 0xffff) ? 0xffff : count;
1008 count -= decr;
1009 env->icount_decr.u16.low = decr;
1010 env->icount_extra = count;
1011 }
1012 ret = cpu_exec(env);
1013 #ifdef CONFIG_PROFILER
1014 qemu_time += profile_getclock() - ti;
1015 #endif
1016 if (use_icount) {
1017 /* Fold pending instructions back into the
1018 instruction counter, and clear the interrupt flag. */
1019 qemu_icount -= (env->icount_decr.u16.low
1020 + env->icount_extra);
1021 env->icount_decr.u32 = 0;
1022 env->icount_extra = 0;
1023 }
1024 return ret;
1025 }
1026
1027 static void tcg_exec_all(void)
1028 {
1029 int r;
1030
1031 /* Account partial waits to the vm_clock. */
1032 qemu_clock_warp(vm_clock);
1033
1034 if (next_cpu == NULL) {
1035 next_cpu = first_cpu;
1036 }
1037 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1038 CPUState *env = next_cpu;
1039
1040 qemu_clock_enable(vm_clock,
1041 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1042
1043 if (cpu_can_run(env)) {
1044 r = tcg_cpu_exec(env);
1045 if (r == EXCP_DEBUG) {
1046 cpu_handle_guest_debug(env);
1047 break;
1048 }
1049 } else if (env->stop || env->stopped) {
1050 break;
1051 }
1052 }
1053 exit_request = 0;
1054 }
1055
1056 void set_numa_modes(void)
1057 {
1058 CPUState *env;
1059 int i;
1060
1061 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1062 for (i = 0; i < nb_numa_nodes; i++) {
1063 if (node_cpumask[i] & (1 << env->cpu_index)) {
1064 env->numa_node = i;
1065 }
1066 }
1067 }
1068 }
1069
1070 void set_cpu_log(const char *optarg)
1071 {
1072 int mask;
1073 const CPULogItem *item;
1074
1075 mask = cpu_str_to_log_mask(optarg);
1076 if (!mask) {
1077 printf("Log items (comma separated):\n");
1078 for (item = cpu_log_items; item->mask != 0; item++) {
1079 printf("%-10s %s\n", item->name, item->help);
1080 }
1081 exit(1);
1082 }
1083 cpu_set_log(mask);
1084 }
1085
1086 void set_cpu_log_filename(const char *optarg)
1087 {
1088 cpu_set_log_filename(optarg);
1089 }
1090
1091 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1092 {
1093 /* XXX: implement xxx_cpu_list for targets that still miss it */
1094 #if defined(cpu_list_id)
1095 cpu_list_id(f, cpu_fprintf, optarg);
1096 #elif defined(cpu_list)
1097 cpu_list(f, cpu_fprintf); /* deprecated */
1098 #endif
1099 }
1100
1101 CpuInfoList *qmp_query_cpus(Error **errp)
1102 {
1103 CpuInfoList *head = NULL, *cur_item = NULL;
1104 CPUState *env;
1105
1106 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1107 CpuInfoList *info;
1108
1109 cpu_synchronize_state(env);
1110
1111 info = g_malloc0(sizeof(*info));
1112 info->value = g_malloc0(sizeof(*info->value));
1113 info->value->CPU = env->cpu_index;
1114 info->value->current = (env == first_cpu);
1115 info->value->halted = env->halted;
1116 info->value->thread_id = env->thread_id;
1117 #if defined(TARGET_I386)
1118 info->value->has_pc = true;
1119 info->value->pc = env->eip + env->segs[R_CS].base;
1120 #elif defined(TARGET_PPC)
1121 info->value->has_nip = true;
1122 info->value->nip = env->nip;
1123 #elif defined(TARGET_SPARC)
1124 info->value->has_pc = true;
1125 info->value->pc = env->pc;
1126 info->value->has_npc = true;
1127 info->value->npc = env->npc;
1128 #elif defined(TARGET_MIPS)
1129 info->value->has_PC = true;
1130 info->value->PC = env->active_tc.PC;
1131 #endif
1132
1133 /* XXX: waiting for the qapi to support GSList */
1134 if (!cur_item) {
1135 head = cur_item = info;
1136 } else {
1137 cur_item->next = info;
1138 cur_item = info;
1139 }
1140 }
1141
1142 return head;
1143 }
1144
1145 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1146 bool has_cpu, int64_t cpu_index, Error **errp)
1147 {
1148 FILE *f;
1149 uint32_t l;
1150 CPUState *env;
1151 uint8_t buf[1024];
1152
1153 if (!has_cpu) {
1154 cpu_index = 0;
1155 }
1156
1157 for (env = first_cpu; env; env = env->next_cpu) {
1158 if (cpu_index == env->cpu_index) {
1159 break;
1160 }
1161 }
1162
1163 if (env == NULL) {
1164 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1165 "a CPU number");
1166 return;
1167 }
1168
1169 f = fopen(filename, "wb");
1170 if (!f) {
1171 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1172 return;
1173 }
1174
1175 while (size != 0) {
1176 l = sizeof(buf);
1177 if (l > size)
1178 l = size;
1179 cpu_memory_rw_debug(env, addr, buf, l, 0);
1180 if (fwrite(buf, 1, l, f) != l) {
1181 error_set(errp, QERR_IO_ERROR);
1182 goto exit;
1183 }
1184 addr += l;
1185 size -= l;
1186 }
1187
1188 exit:
1189 fclose(f);
1190 }
1191
1192 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1193 Error **errp)
1194 {
1195 FILE *f;
1196 uint32_t l;
1197 uint8_t buf[1024];
1198
1199 f = fopen(filename, "wb");
1200 if (!f) {
1201 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1202 return;
1203 }
1204
1205 while (size != 0) {
1206 l = sizeof(buf);
1207 if (l > size)
1208 l = size;
1209 cpu_physical_memory_rw(addr, buf, l, 0);
1210 if (fwrite(buf, 1, l, f) != l) {
1211 error_set(errp, QERR_IO_ERROR);
1212 goto exit;
1213 }
1214 addr += l;
1215 size -= l;
1216 }
1217
1218 exit:
1219 fclose(f);
1220 }
1221
1222 void qmp_inject_nmi(Error **errp)
1223 {
1224 #if defined(TARGET_I386)
1225 CPUState *env;
1226
1227 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1228 cpu_interrupt(env, CPU_INTERRUPT_NMI);
1229 }
1230 #else
1231 error_set(errp, QERR_UNSUPPORTED);
1232 #endif
1233 }