]> git.proxmox.com Git - qemu.git/blob - cpus.c
hw/armv7m_nvic: Fix incorrect default for num-irqs property
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor.h"
29 #include "sysemu.h"
30 #include "gdbstub.h"
31 #include "dma.h"
32 #include "kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu-thread.h"
36 #include "cpus.h"
37 #include "qtest.h"
38 #include "main-loop.h"
39 #include "bitmap.h"
40
41 #ifndef _WIN32
42 #include "compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUArchState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUArchState *env)
66 {
67 if (env->stop || env->queued_work_first) {
68 return false;
69 }
70 if (env->stopped || !runstate_is_running()) {
71 return true;
72 }
73 if (!env->halted || qemu_cpu_has_work(env) || kvm_irqchip_in_kernel()) {
74 return false;
75 }
76 return true;
77 }
78
79 static bool all_cpu_threads_idle(void)
80 {
81 CPUArchState *env;
82
83 for (env = first_cpu; env != NULL; env = env->next_cpu) {
84 if (!cpu_thread_is_idle(env)) {
85 return false;
86 }
87 }
88 return true;
89 }
90
91 /***********************************************************/
92 /* guest cycle counter */
93
94 /* Conversion factor from emulated instructions to virtual clock ticks. */
95 static int icount_time_shift;
96 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
97 #define MAX_ICOUNT_SHIFT 10
98 /* Compensate for varying guest execution speed. */
99 static int64_t qemu_icount_bias;
100 static QEMUTimer *icount_rt_timer;
101 static QEMUTimer *icount_vm_timer;
102 static QEMUTimer *icount_warp_timer;
103 static int64_t vm_clock_warp_start;
104 static int64_t qemu_icount;
105
106 typedef struct TimersState {
107 int64_t cpu_ticks_prev;
108 int64_t cpu_ticks_offset;
109 int64_t cpu_clock_offset;
110 int32_t cpu_ticks_enabled;
111 int64_t dummy;
112 } TimersState;
113
114 TimersState timers_state;
115
116 /* Return the virtual CPU time, based on the instruction counter. */
117 int64_t cpu_get_icount(void)
118 {
119 int64_t icount;
120 CPUArchState *env = cpu_single_env;
121
122 icount = qemu_icount;
123 if (env) {
124 if (!can_do_io(env)) {
125 fprintf(stderr, "Bad clock read\n");
126 }
127 icount -= (env->icount_decr.u16.low + env->icount_extra);
128 }
129 return qemu_icount_bias + (icount << icount_time_shift);
130 }
131
132 /* return the host CPU cycle counter and handle stop/restart */
133 int64_t cpu_get_ticks(void)
134 {
135 if (use_icount) {
136 return cpu_get_icount();
137 }
138 if (!timers_state.cpu_ticks_enabled) {
139 return timers_state.cpu_ticks_offset;
140 } else {
141 int64_t ticks;
142 ticks = cpu_get_real_ticks();
143 if (timers_state.cpu_ticks_prev > ticks) {
144 /* Note: non increasing ticks may happen if the host uses
145 software suspend */
146 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
147 }
148 timers_state.cpu_ticks_prev = ticks;
149 return ticks + timers_state.cpu_ticks_offset;
150 }
151 }
152
153 /* return the host CPU monotonic timer and handle stop/restart */
154 int64_t cpu_get_clock(void)
155 {
156 int64_t ti;
157 if (!timers_state.cpu_ticks_enabled) {
158 return timers_state.cpu_clock_offset;
159 } else {
160 ti = get_clock();
161 return ti + timers_state.cpu_clock_offset;
162 }
163 }
164
165 /* enable cpu_get_ticks() */
166 void cpu_enable_ticks(void)
167 {
168 if (!timers_state.cpu_ticks_enabled) {
169 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
170 timers_state.cpu_clock_offset -= get_clock();
171 timers_state.cpu_ticks_enabled = 1;
172 }
173 }
174
175 /* disable cpu_get_ticks() : the clock is stopped. You must not call
176 cpu_get_ticks() after that. */
177 void cpu_disable_ticks(void)
178 {
179 if (timers_state.cpu_ticks_enabled) {
180 timers_state.cpu_ticks_offset = cpu_get_ticks();
181 timers_state.cpu_clock_offset = cpu_get_clock();
182 timers_state.cpu_ticks_enabled = 0;
183 }
184 }
185
186 /* Correlation between real and virtual time is always going to be
187 fairly approximate, so ignore small variation.
188 When the guest is idle real and virtual time will be aligned in
189 the IO wait loop. */
190 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
191
192 static void icount_adjust(void)
193 {
194 int64_t cur_time;
195 int64_t cur_icount;
196 int64_t delta;
197 static int64_t last_delta;
198 /* If the VM is not running, then do nothing. */
199 if (!runstate_is_running()) {
200 return;
201 }
202 cur_time = cpu_get_clock();
203 cur_icount = qemu_get_clock_ns(vm_clock);
204 delta = cur_icount - cur_time;
205 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
206 if (delta > 0
207 && last_delta + ICOUNT_WOBBLE < delta * 2
208 && icount_time_shift > 0) {
209 /* The guest is getting too far ahead. Slow time down. */
210 icount_time_shift--;
211 }
212 if (delta < 0
213 && last_delta - ICOUNT_WOBBLE > delta * 2
214 && icount_time_shift < MAX_ICOUNT_SHIFT) {
215 /* The guest is getting too far behind. Speed time up. */
216 icount_time_shift++;
217 }
218 last_delta = delta;
219 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
220 }
221
222 static void icount_adjust_rt(void *opaque)
223 {
224 qemu_mod_timer(icount_rt_timer,
225 qemu_get_clock_ms(rt_clock) + 1000);
226 icount_adjust();
227 }
228
229 static void icount_adjust_vm(void *opaque)
230 {
231 qemu_mod_timer(icount_vm_timer,
232 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
233 icount_adjust();
234 }
235
236 static int64_t qemu_icount_round(int64_t count)
237 {
238 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
239 }
240
241 static void icount_warp_rt(void *opaque)
242 {
243 if (vm_clock_warp_start == -1) {
244 return;
245 }
246
247 if (runstate_is_running()) {
248 int64_t clock = qemu_get_clock_ns(rt_clock);
249 int64_t warp_delta = clock - vm_clock_warp_start;
250 if (use_icount == 1) {
251 qemu_icount_bias += warp_delta;
252 } else {
253 /*
254 * In adaptive mode, do not let the vm_clock run too
255 * far ahead of real time.
256 */
257 int64_t cur_time = cpu_get_clock();
258 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
259 int64_t delta = cur_time - cur_icount;
260 qemu_icount_bias += MIN(warp_delta, delta);
261 }
262 if (qemu_clock_expired(vm_clock)) {
263 qemu_notify_event();
264 }
265 }
266 vm_clock_warp_start = -1;
267 }
268
269 void qtest_clock_warp(int64_t dest)
270 {
271 int64_t clock = qemu_get_clock_ns(vm_clock);
272 assert(qtest_enabled());
273 while (clock < dest) {
274 int64_t deadline = qemu_clock_deadline(vm_clock);
275 int64_t warp = MIN(dest - clock, deadline);
276 qemu_icount_bias += warp;
277 qemu_run_timers(vm_clock);
278 clock = qemu_get_clock_ns(vm_clock);
279 }
280 qemu_notify_event();
281 }
282
283 void qemu_clock_warp(QEMUClock *clock)
284 {
285 int64_t deadline;
286
287 /*
288 * There are too many global variables to make the "warp" behavior
289 * applicable to other clocks. But a clock argument removes the
290 * need for if statements all over the place.
291 */
292 if (clock != vm_clock || !use_icount) {
293 return;
294 }
295
296 /*
297 * If the CPUs have been sleeping, advance the vm_clock timer now. This
298 * ensures that the deadline for the timer is computed correctly below.
299 * This also makes sure that the insn counter is synchronized before the
300 * CPU starts running, in case the CPU is woken by an event other than
301 * the earliest vm_clock timer.
302 */
303 icount_warp_rt(NULL);
304 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
305 qemu_del_timer(icount_warp_timer);
306 return;
307 }
308
309 if (qtest_enabled()) {
310 /* When testing, qtest commands advance icount. */
311 return;
312 }
313
314 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
315 deadline = qemu_clock_deadline(vm_clock);
316 if (deadline > 0) {
317 /*
318 * Ensure the vm_clock proceeds even when the virtual CPU goes to
319 * sleep. Otherwise, the CPU might be waiting for a future timer
320 * interrupt to wake it up, but the interrupt never comes because
321 * the vCPU isn't running any insns and thus doesn't advance the
322 * vm_clock.
323 *
324 * An extreme solution for this problem would be to never let VCPUs
325 * sleep in icount mode if there is a pending vm_clock timer; rather
326 * time could just advance to the next vm_clock event. Instead, we
327 * do stop VCPUs and only advance vm_clock after some "real" time,
328 * (related to the time left until the next event) has passed. This
329 * rt_clock timer will do this. This avoids that the warps are too
330 * visible externally---for example, you will not be sending network
331 * packets continuously instead of every 100ms.
332 */
333 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
334 } else {
335 qemu_notify_event();
336 }
337 }
338
339 static const VMStateDescription vmstate_timers = {
340 .name = "timer",
341 .version_id = 2,
342 .minimum_version_id = 1,
343 .minimum_version_id_old = 1,
344 .fields = (VMStateField[]) {
345 VMSTATE_INT64(cpu_ticks_offset, TimersState),
346 VMSTATE_INT64(dummy, TimersState),
347 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
348 VMSTATE_END_OF_LIST()
349 }
350 };
351
352 void configure_icount(const char *option)
353 {
354 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
355 if (!option) {
356 return;
357 }
358
359 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
360 if (strcmp(option, "auto") != 0) {
361 icount_time_shift = strtol(option, NULL, 0);
362 use_icount = 1;
363 return;
364 }
365
366 use_icount = 2;
367
368 /* 125MIPS seems a reasonable initial guess at the guest speed.
369 It will be corrected fairly quickly anyway. */
370 icount_time_shift = 3;
371
372 /* Have both realtime and virtual time triggers for speed adjustment.
373 The realtime trigger catches emulated time passing too slowly,
374 the virtual time trigger catches emulated time passing too fast.
375 Realtime triggers occur even when idle, so use them less frequently
376 than VM triggers. */
377 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
378 qemu_mod_timer(icount_rt_timer,
379 qemu_get_clock_ms(rt_clock) + 1000);
380 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
381 qemu_mod_timer(icount_vm_timer,
382 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
383 }
384
385 /***********************************************************/
386 void hw_error(const char *fmt, ...)
387 {
388 va_list ap;
389 CPUArchState *env;
390
391 va_start(ap, fmt);
392 fprintf(stderr, "qemu: hardware error: ");
393 vfprintf(stderr, fmt, ap);
394 fprintf(stderr, "\n");
395 for(env = first_cpu; env != NULL; env = env->next_cpu) {
396 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
397 #ifdef TARGET_I386
398 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
399 #else
400 cpu_dump_state(env, stderr, fprintf, 0);
401 #endif
402 }
403 va_end(ap);
404 abort();
405 }
406
407 void cpu_synchronize_all_states(void)
408 {
409 CPUArchState *cpu;
410
411 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
412 cpu_synchronize_state(cpu);
413 }
414 }
415
416 void cpu_synchronize_all_post_reset(void)
417 {
418 CPUArchState *cpu;
419
420 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
421 cpu_synchronize_post_reset(cpu);
422 }
423 }
424
425 void cpu_synchronize_all_post_init(void)
426 {
427 CPUArchState *cpu;
428
429 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
430 cpu_synchronize_post_init(cpu);
431 }
432 }
433
434 int cpu_is_stopped(CPUArchState *env)
435 {
436 return !runstate_is_running() || env->stopped;
437 }
438
439 static void do_vm_stop(RunState state)
440 {
441 if (runstate_is_running()) {
442 cpu_disable_ticks();
443 pause_all_vcpus();
444 runstate_set(state);
445 vm_state_notify(0, state);
446 bdrv_drain_all();
447 bdrv_flush_all();
448 monitor_protocol_event(QEVENT_STOP, NULL);
449 }
450 }
451
452 static int cpu_can_run(CPUArchState *env)
453 {
454 if (env->stop) {
455 return 0;
456 }
457 if (env->stopped || !runstate_is_running()) {
458 return 0;
459 }
460 return 1;
461 }
462
463 static void cpu_handle_guest_debug(CPUArchState *env)
464 {
465 gdb_set_stop_cpu(env);
466 qemu_system_debug_request();
467 env->stopped = 1;
468 }
469
470 static void cpu_signal(int sig)
471 {
472 if (cpu_single_env) {
473 cpu_exit(cpu_single_env);
474 }
475 exit_request = 1;
476 }
477
478 #ifdef CONFIG_LINUX
479 static void sigbus_reraise(void)
480 {
481 sigset_t set;
482 struct sigaction action;
483
484 memset(&action, 0, sizeof(action));
485 action.sa_handler = SIG_DFL;
486 if (!sigaction(SIGBUS, &action, NULL)) {
487 raise(SIGBUS);
488 sigemptyset(&set);
489 sigaddset(&set, SIGBUS);
490 sigprocmask(SIG_UNBLOCK, &set, NULL);
491 }
492 perror("Failed to re-raise SIGBUS!\n");
493 abort();
494 }
495
496 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
497 void *ctx)
498 {
499 if (kvm_on_sigbus(siginfo->ssi_code,
500 (void *)(intptr_t)siginfo->ssi_addr)) {
501 sigbus_reraise();
502 }
503 }
504
505 static void qemu_init_sigbus(void)
506 {
507 struct sigaction action;
508
509 memset(&action, 0, sizeof(action));
510 action.sa_flags = SA_SIGINFO;
511 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
512 sigaction(SIGBUS, &action, NULL);
513
514 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
515 }
516
517 static void qemu_kvm_eat_signals(CPUArchState *env)
518 {
519 struct timespec ts = { 0, 0 };
520 siginfo_t siginfo;
521 sigset_t waitset;
522 sigset_t chkset;
523 int r;
524
525 sigemptyset(&waitset);
526 sigaddset(&waitset, SIG_IPI);
527 sigaddset(&waitset, SIGBUS);
528
529 do {
530 r = sigtimedwait(&waitset, &siginfo, &ts);
531 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
532 perror("sigtimedwait");
533 exit(1);
534 }
535
536 switch (r) {
537 case SIGBUS:
538 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
539 sigbus_reraise();
540 }
541 break;
542 default:
543 break;
544 }
545
546 r = sigpending(&chkset);
547 if (r == -1) {
548 perror("sigpending");
549 exit(1);
550 }
551 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
552 }
553
554 #else /* !CONFIG_LINUX */
555
556 static void qemu_init_sigbus(void)
557 {
558 }
559
560 static void qemu_kvm_eat_signals(CPUArchState *env)
561 {
562 }
563 #endif /* !CONFIG_LINUX */
564
565 #ifndef _WIN32
566 static void dummy_signal(int sig)
567 {
568 }
569
570 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
571 {
572 int r;
573 sigset_t set;
574 struct sigaction sigact;
575
576 memset(&sigact, 0, sizeof(sigact));
577 sigact.sa_handler = dummy_signal;
578 sigaction(SIG_IPI, &sigact, NULL);
579
580 pthread_sigmask(SIG_BLOCK, NULL, &set);
581 sigdelset(&set, SIG_IPI);
582 sigdelset(&set, SIGBUS);
583 r = kvm_set_signal_mask(env, &set);
584 if (r) {
585 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
586 exit(1);
587 }
588 }
589
590 static void qemu_tcg_init_cpu_signals(void)
591 {
592 sigset_t set;
593 struct sigaction sigact;
594
595 memset(&sigact, 0, sizeof(sigact));
596 sigact.sa_handler = cpu_signal;
597 sigaction(SIG_IPI, &sigact, NULL);
598
599 sigemptyset(&set);
600 sigaddset(&set, SIG_IPI);
601 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
602 }
603
604 #else /* _WIN32 */
605 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
606 {
607 abort();
608 }
609
610 static void qemu_tcg_init_cpu_signals(void)
611 {
612 }
613 #endif /* _WIN32 */
614
615 QemuMutex qemu_global_mutex;
616 static QemuCond qemu_io_proceeded_cond;
617 static bool iothread_requesting_mutex;
618
619 static QemuThread io_thread;
620
621 static QemuThread *tcg_cpu_thread;
622 static QemuCond *tcg_halt_cond;
623
624 /* cpu creation */
625 static QemuCond qemu_cpu_cond;
626 /* system init */
627 static QemuCond qemu_pause_cond;
628 static QemuCond qemu_work_cond;
629
630 void qemu_init_cpu_loop(void)
631 {
632 qemu_init_sigbus();
633 qemu_cond_init(&qemu_cpu_cond);
634 qemu_cond_init(&qemu_pause_cond);
635 qemu_cond_init(&qemu_work_cond);
636 qemu_cond_init(&qemu_io_proceeded_cond);
637 qemu_mutex_init(&qemu_global_mutex);
638
639 qemu_thread_get_self(&io_thread);
640 }
641
642 void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
643 {
644 struct qemu_work_item wi;
645
646 if (qemu_cpu_is_self(env)) {
647 func(data);
648 return;
649 }
650
651 wi.func = func;
652 wi.data = data;
653 if (!env->queued_work_first) {
654 env->queued_work_first = &wi;
655 } else {
656 env->queued_work_last->next = &wi;
657 }
658 env->queued_work_last = &wi;
659 wi.next = NULL;
660 wi.done = false;
661
662 qemu_cpu_kick(env);
663 while (!wi.done) {
664 CPUArchState *self_env = cpu_single_env;
665
666 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
667 cpu_single_env = self_env;
668 }
669 }
670
671 static void flush_queued_work(CPUArchState *env)
672 {
673 struct qemu_work_item *wi;
674
675 if (!env->queued_work_first) {
676 return;
677 }
678
679 while ((wi = env->queued_work_first)) {
680 env->queued_work_first = wi->next;
681 wi->func(wi->data);
682 wi->done = true;
683 }
684 env->queued_work_last = NULL;
685 qemu_cond_broadcast(&qemu_work_cond);
686 }
687
688 static void qemu_wait_io_event_common(CPUArchState *env)
689 {
690 CPUState *cpu = ENV_GET_CPU(env);
691
692 if (env->stop) {
693 env->stop = 0;
694 env->stopped = 1;
695 qemu_cond_signal(&qemu_pause_cond);
696 }
697 flush_queued_work(env);
698 cpu->thread_kicked = false;
699 }
700
701 static void qemu_tcg_wait_io_event(void)
702 {
703 CPUArchState *env;
704
705 while (all_cpu_threads_idle()) {
706 /* Start accounting real time to the virtual clock if the CPUs
707 are idle. */
708 qemu_clock_warp(vm_clock);
709 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
710 }
711
712 while (iothread_requesting_mutex) {
713 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
714 }
715
716 for (env = first_cpu; env != NULL; env = env->next_cpu) {
717 qemu_wait_io_event_common(env);
718 }
719 }
720
721 static void qemu_kvm_wait_io_event(CPUArchState *env)
722 {
723 while (cpu_thread_is_idle(env)) {
724 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
725 }
726
727 qemu_kvm_eat_signals(env);
728 qemu_wait_io_event_common(env);
729 }
730
731 static void *qemu_kvm_cpu_thread_fn(void *arg)
732 {
733 CPUArchState *env = arg;
734 CPUState *cpu = ENV_GET_CPU(env);
735 int r;
736
737 qemu_mutex_lock(&qemu_global_mutex);
738 qemu_thread_get_self(cpu->thread);
739 env->thread_id = qemu_get_thread_id();
740 cpu_single_env = env;
741
742 r = kvm_init_vcpu(env);
743 if (r < 0) {
744 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
745 exit(1);
746 }
747
748 qemu_kvm_init_cpu_signals(env);
749
750 /* signal CPU creation */
751 env->created = 1;
752 qemu_cond_signal(&qemu_cpu_cond);
753
754 while (1) {
755 if (cpu_can_run(env)) {
756 r = kvm_cpu_exec(env);
757 if (r == EXCP_DEBUG) {
758 cpu_handle_guest_debug(env);
759 }
760 }
761 qemu_kvm_wait_io_event(env);
762 }
763
764 return NULL;
765 }
766
767 static void *qemu_dummy_cpu_thread_fn(void *arg)
768 {
769 #ifdef _WIN32
770 fprintf(stderr, "qtest is not supported under Windows\n");
771 exit(1);
772 #else
773 CPUArchState *env = arg;
774 CPUState *cpu = ENV_GET_CPU(env);
775 sigset_t waitset;
776 int r;
777
778 qemu_mutex_lock_iothread();
779 qemu_thread_get_self(cpu->thread);
780 env->thread_id = qemu_get_thread_id();
781
782 sigemptyset(&waitset);
783 sigaddset(&waitset, SIG_IPI);
784
785 /* signal CPU creation */
786 env->created = 1;
787 qemu_cond_signal(&qemu_cpu_cond);
788
789 cpu_single_env = env;
790 while (1) {
791 cpu_single_env = NULL;
792 qemu_mutex_unlock_iothread();
793 do {
794 int sig;
795 r = sigwait(&waitset, &sig);
796 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
797 if (r == -1) {
798 perror("sigwait");
799 exit(1);
800 }
801 qemu_mutex_lock_iothread();
802 cpu_single_env = env;
803 qemu_wait_io_event_common(env);
804 }
805
806 return NULL;
807 #endif
808 }
809
810 static void tcg_exec_all(void);
811
812 static void *qemu_tcg_cpu_thread_fn(void *arg)
813 {
814 CPUArchState *env = arg;
815 CPUState *cpu = ENV_GET_CPU(env);
816
817 qemu_tcg_init_cpu_signals();
818 qemu_thread_get_self(cpu->thread);
819
820 /* signal CPU creation */
821 qemu_mutex_lock(&qemu_global_mutex);
822 for (env = first_cpu; env != NULL; env = env->next_cpu) {
823 env->thread_id = qemu_get_thread_id();
824 env->created = 1;
825 }
826 qemu_cond_signal(&qemu_cpu_cond);
827
828 /* wait for initial kick-off after machine start */
829 while (first_cpu->stopped) {
830 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
831
832 /* process any pending work */
833 for (env = first_cpu; env != NULL; env = env->next_cpu) {
834 qemu_wait_io_event_common(env);
835 }
836 }
837
838 while (1) {
839 tcg_exec_all();
840 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
841 qemu_notify_event();
842 }
843 qemu_tcg_wait_io_event();
844 }
845
846 return NULL;
847 }
848
849 static void qemu_cpu_kick_thread(CPUArchState *env)
850 {
851 CPUState *cpu = ENV_GET_CPU(env);
852 #ifndef _WIN32
853 int err;
854
855 err = pthread_kill(cpu->thread->thread, SIG_IPI);
856 if (err) {
857 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
858 exit(1);
859 }
860 #else /* _WIN32 */
861 if (!qemu_cpu_is_self(env)) {
862 SuspendThread(cpu->hThread);
863 cpu_signal(0);
864 ResumeThread(cpu->hThread);
865 }
866 #endif
867 }
868
869 void qemu_cpu_kick(void *_env)
870 {
871 CPUArchState *env = _env;
872 CPUState *cpu = ENV_GET_CPU(env);
873
874 qemu_cond_broadcast(env->halt_cond);
875 if (!tcg_enabled() && !cpu->thread_kicked) {
876 qemu_cpu_kick_thread(env);
877 cpu->thread_kicked = true;
878 }
879 }
880
881 void qemu_cpu_kick_self(void)
882 {
883 #ifndef _WIN32
884 assert(cpu_single_env);
885 CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
886
887 if (!cpu_single_cpu->thread_kicked) {
888 qemu_cpu_kick_thread(cpu_single_env);
889 cpu_single_cpu->thread_kicked = true;
890 }
891 #else
892 abort();
893 #endif
894 }
895
896 int qemu_cpu_is_self(void *_env)
897 {
898 CPUArchState *env = _env;
899 CPUState *cpu = ENV_GET_CPU(env);
900
901 return qemu_thread_is_self(cpu->thread);
902 }
903
904 void qemu_mutex_lock_iothread(void)
905 {
906 if (!tcg_enabled()) {
907 qemu_mutex_lock(&qemu_global_mutex);
908 } else {
909 iothread_requesting_mutex = true;
910 if (qemu_mutex_trylock(&qemu_global_mutex)) {
911 qemu_cpu_kick_thread(first_cpu);
912 qemu_mutex_lock(&qemu_global_mutex);
913 }
914 iothread_requesting_mutex = false;
915 qemu_cond_broadcast(&qemu_io_proceeded_cond);
916 }
917 }
918
919 void qemu_mutex_unlock_iothread(void)
920 {
921 qemu_mutex_unlock(&qemu_global_mutex);
922 }
923
924 static int all_vcpus_paused(void)
925 {
926 CPUArchState *penv = first_cpu;
927
928 while (penv) {
929 if (!penv->stopped) {
930 return 0;
931 }
932 penv = penv->next_cpu;
933 }
934
935 return 1;
936 }
937
938 void pause_all_vcpus(void)
939 {
940 CPUArchState *penv = first_cpu;
941
942 qemu_clock_enable(vm_clock, false);
943 while (penv) {
944 penv->stop = 1;
945 qemu_cpu_kick(penv);
946 penv = penv->next_cpu;
947 }
948
949 if (!qemu_thread_is_self(&io_thread)) {
950 cpu_stop_current();
951 if (!kvm_enabled()) {
952 while (penv) {
953 penv->stop = 0;
954 penv->stopped = 1;
955 penv = penv->next_cpu;
956 }
957 return;
958 }
959 }
960
961 while (!all_vcpus_paused()) {
962 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
963 penv = first_cpu;
964 while (penv) {
965 qemu_cpu_kick(penv);
966 penv = penv->next_cpu;
967 }
968 }
969 }
970
971 void resume_all_vcpus(void)
972 {
973 CPUArchState *penv = first_cpu;
974
975 qemu_clock_enable(vm_clock, true);
976 while (penv) {
977 penv->stop = 0;
978 penv->stopped = 0;
979 qemu_cpu_kick(penv);
980 penv = penv->next_cpu;
981 }
982 }
983
984 static void qemu_tcg_init_vcpu(void *_env)
985 {
986 CPUArchState *env = _env;
987 CPUState *cpu = ENV_GET_CPU(env);
988
989 /* share a single thread for all cpus with TCG */
990 if (!tcg_cpu_thread) {
991 cpu->thread = g_malloc0(sizeof(QemuThread));
992 env->halt_cond = g_malloc0(sizeof(QemuCond));
993 qemu_cond_init(env->halt_cond);
994 tcg_halt_cond = env->halt_cond;
995 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env,
996 QEMU_THREAD_JOINABLE);
997 #ifdef _WIN32
998 cpu->hThread = qemu_thread_get_handle(cpu->thread);
999 #endif
1000 while (env->created == 0) {
1001 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1002 }
1003 tcg_cpu_thread = cpu->thread;
1004 } else {
1005 cpu->thread = tcg_cpu_thread;
1006 env->halt_cond = tcg_halt_cond;
1007 }
1008 }
1009
1010 static void qemu_kvm_start_vcpu(CPUArchState *env)
1011 {
1012 CPUState *cpu = ENV_GET_CPU(env);
1013
1014 cpu->thread = g_malloc0(sizeof(QemuThread));
1015 env->halt_cond = g_malloc0(sizeof(QemuCond));
1016 qemu_cond_init(env->halt_cond);
1017 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1018 QEMU_THREAD_JOINABLE);
1019 while (env->created == 0) {
1020 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1021 }
1022 }
1023
1024 static void qemu_dummy_start_vcpu(CPUArchState *env)
1025 {
1026 CPUState *cpu = ENV_GET_CPU(env);
1027
1028 cpu->thread = g_malloc0(sizeof(QemuThread));
1029 env->halt_cond = g_malloc0(sizeof(QemuCond));
1030 qemu_cond_init(env->halt_cond);
1031 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1032 QEMU_THREAD_JOINABLE);
1033 while (env->created == 0) {
1034 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1035 }
1036 }
1037
1038 void qemu_init_vcpu(void *_env)
1039 {
1040 CPUArchState *env = _env;
1041
1042 env->nr_cores = smp_cores;
1043 env->nr_threads = smp_threads;
1044 env->stopped = 1;
1045 if (kvm_enabled()) {
1046 qemu_kvm_start_vcpu(env);
1047 } else if (tcg_enabled()) {
1048 qemu_tcg_init_vcpu(env);
1049 } else {
1050 qemu_dummy_start_vcpu(env);
1051 }
1052 }
1053
1054 void cpu_stop_current(void)
1055 {
1056 if (cpu_single_env) {
1057 cpu_single_env->stop = 0;
1058 cpu_single_env->stopped = 1;
1059 cpu_exit(cpu_single_env);
1060 qemu_cond_signal(&qemu_pause_cond);
1061 }
1062 }
1063
1064 void vm_stop(RunState state)
1065 {
1066 if (!qemu_thread_is_self(&io_thread)) {
1067 qemu_system_vmstop_request(state);
1068 /*
1069 * FIXME: should not return to device code in case
1070 * vm_stop() has been requested.
1071 */
1072 cpu_stop_current();
1073 return;
1074 }
1075 do_vm_stop(state);
1076 }
1077
1078 /* does a state transition even if the VM is already stopped,
1079 current state is forgotten forever */
1080 void vm_stop_force_state(RunState state)
1081 {
1082 if (runstate_is_running()) {
1083 vm_stop(state);
1084 } else {
1085 runstate_set(state);
1086 }
1087 }
1088
1089 static int tcg_cpu_exec(CPUArchState *env)
1090 {
1091 int ret;
1092 #ifdef CONFIG_PROFILER
1093 int64_t ti;
1094 #endif
1095
1096 #ifdef CONFIG_PROFILER
1097 ti = profile_getclock();
1098 #endif
1099 if (use_icount) {
1100 int64_t count;
1101 int decr;
1102 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1103 env->icount_decr.u16.low = 0;
1104 env->icount_extra = 0;
1105 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1106 qemu_icount += count;
1107 decr = (count > 0xffff) ? 0xffff : count;
1108 count -= decr;
1109 env->icount_decr.u16.low = decr;
1110 env->icount_extra = count;
1111 }
1112 ret = cpu_exec(env);
1113 #ifdef CONFIG_PROFILER
1114 qemu_time += profile_getclock() - ti;
1115 #endif
1116 if (use_icount) {
1117 /* Fold pending instructions back into the
1118 instruction counter, and clear the interrupt flag. */
1119 qemu_icount -= (env->icount_decr.u16.low
1120 + env->icount_extra);
1121 env->icount_decr.u32 = 0;
1122 env->icount_extra = 0;
1123 }
1124 return ret;
1125 }
1126
1127 static void tcg_exec_all(void)
1128 {
1129 int r;
1130
1131 /* Account partial waits to the vm_clock. */
1132 qemu_clock_warp(vm_clock);
1133
1134 if (next_cpu == NULL) {
1135 next_cpu = first_cpu;
1136 }
1137 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1138 CPUArchState *env = next_cpu;
1139
1140 qemu_clock_enable(vm_clock,
1141 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1142
1143 if (cpu_can_run(env)) {
1144 r = tcg_cpu_exec(env);
1145 if (r == EXCP_DEBUG) {
1146 cpu_handle_guest_debug(env);
1147 break;
1148 }
1149 } else if (env->stop || env->stopped) {
1150 break;
1151 }
1152 }
1153 exit_request = 0;
1154 }
1155
1156 void set_numa_modes(void)
1157 {
1158 CPUArchState *env;
1159 int i;
1160
1161 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1162 for (i = 0; i < nb_numa_nodes; i++) {
1163 if (test_bit(env->cpu_index, node_cpumask[i])) {
1164 env->numa_node = i;
1165 }
1166 }
1167 }
1168 }
1169
1170 void set_cpu_log(const char *optarg)
1171 {
1172 int mask;
1173 const CPULogItem *item;
1174
1175 mask = cpu_str_to_log_mask(optarg);
1176 if (!mask) {
1177 printf("Log items (comma separated):\n");
1178 for (item = cpu_log_items; item->mask != 0; item++) {
1179 printf("%-10s %s\n", item->name, item->help);
1180 }
1181 exit(1);
1182 }
1183 cpu_set_log(mask);
1184 }
1185
1186 void set_cpu_log_filename(const char *optarg)
1187 {
1188 cpu_set_log_filename(optarg);
1189 }
1190
1191 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1192 {
1193 /* XXX: implement xxx_cpu_list for targets that still miss it */
1194 #if defined(cpu_list_id)
1195 cpu_list_id(f, cpu_fprintf, optarg);
1196 #elif defined(cpu_list)
1197 cpu_list(f, cpu_fprintf); /* deprecated */
1198 #endif
1199 }
1200
1201 CpuInfoList *qmp_query_cpus(Error **errp)
1202 {
1203 CpuInfoList *head = NULL, *cur_item = NULL;
1204 CPUArchState *env;
1205
1206 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1207 CpuInfoList *info;
1208
1209 cpu_synchronize_state(env);
1210
1211 info = g_malloc0(sizeof(*info));
1212 info->value = g_malloc0(sizeof(*info->value));
1213 info->value->CPU = env->cpu_index;
1214 info->value->current = (env == first_cpu);
1215 info->value->halted = env->halted;
1216 info->value->thread_id = env->thread_id;
1217 #if defined(TARGET_I386)
1218 info->value->has_pc = true;
1219 info->value->pc = env->eip + env->segs[R_CS].base;
1220 #elif defined(TARGET_PPC)
1221 info->value->has_nip = true;
1222 info->value->nip = env->nip;
1223 #elif defined(TARGET_SPARC)
1224 info->value->has_pc = true;
1225 info->value->pc = env->pc;
1226 info->value->has_npc = true;
1227 info->value->npc = env->npc;
1228 #elif defined(TARGET_MIPS)
1229 info->value->has_PC = true;
1230 info->value->PC = env->active_tc.PC;
1231 #endif
1232
1233 /* XXX: waiting for the qapi to support GSList */
1234 if (!cur_item) {
1235 head = cur_item = info;
1236 } else {
1237 cur_item->next = info;
1238 cur_item = info;
1239 }
1240 }
1241
1242 return head;
1243 }
1244
1245 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1246 bool has_cpu, int64_t cpu_index, Error **errp)
1247 {
1248 FILE *f;
1249 uint32_t l;
1250 CPUArchState *env;
1251 uint8_t buf[1024];
1252
1253 if (!has_cpu) {
1254 cpu_index = 0;
1255 }
1256
1257 for (env = first_cpu; env; env = env->next_cpu) {
1258 if (cpu_index == env->cpu_index) {
1259 break;
1260 }
1261 }
1262
1263 if (env == NULL) {
1264 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1265 "a CPU number");
1266 return;
1267 }
1268
1269 f = fopen(filename, "wb");
1270 if (!f) {
1271 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1272 return;
1273 }
1274
1275 while (size != 0) {
1276 l = sizeof(buf);
1277 if (l > size)
1278 l = size;
1279 cpu_memory_rw_debug(env, addr, buf, l, 0);
1280 if (fwrite(buf, 1, l, f) != l) {
1281 error_set(errp, QERR_IO_ERROR);
1282 goto exit;
1283 }
1284 addr += l;
1285 size -= l;
1286 }
1287
1288 exit:
1289 fclose(f);
1290 }
1291
1292 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1293 Error **errp)
1294 {
1295 FILE *f;
1296 uint32_t l;
1297 uint8_t buf[1024];
1298
1299 f = fopen(filename, "wb");
1300 if (!f) {
1301 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1302 return;
1303 }
1304
1305 while (size != 0) {
1306 l = sizeof(buf);
1307 if (l > size)
1308 l = size;
1309 cpu_physical_memory_rw(addr, buf, l, 0);
1310 if (fwrite(buf, 1, l, f) != l) {
1311 error_set(errp, QERR_IO_ERROR);
1312 goto exit;
1313 }
1314 addr += l;
1315 size -= l;
1316 }
1317
1318 exit:
1319 fclose(f);
1320 }
1321
1322 void qmp_inject_nmi(Error **errp)
1323 {
1324 #if defined(TARGET_I386)
1325 CPUArchState *env;
1326
1327 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1328 if (!env->apic_state) {
1329 cpu_interrupt(env, CPU_INTERRUPT_NMI);
1330 } else {
1331 apic_deliver_nmi(env->apic_state);
1332 }
1333 }
1334 #else
1335 error_set(errp, QERR_UNSUPPORTED);
1336 #endif
1337 }