]> git.proxmox.com Git - qemu.git/blob - cpus.c
cpu: Move stopped field to CPUState
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor.h"
29 #include "sysemu.h"
30 #include "gdbstub.h"
31 #include "dma.h"
32 #include "kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu-thread.h"
36 #include "cpus.h"
37 #include "qtest.h"
38 #include "main-loop.h"
39 #include "bitmap.h"
40
41 #ifndef _WIN32
42 #include "compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUArchState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUArchState *env)
66 {
67 CPUState *cpu = ENV_GET_CPU(env);
68
69 if (cpu->stop || env->queued_work_first) {
70 return false;
71 }
72 if (cpu->stopped || !runstate_is_running()) {
73 return true;
74 }
75 if (!env->halted || qemu_cpu_has_work(env) ||
76 kvm_async_interrupts_enabled()) {
77 return false;
78 }
79 return true;
80 }
81
82 static bool all_cpu_threads_idle(void)
83 {
84 CPUArchState *env;
85
86 for (env = first_cpu; env != NULL; env = env->next_cpu) {
87 if (!cpu_thread_is_idle(env)) {
88 return false;
89 }
90 }
91 return true;
92 }
93
94 /***********************************************************/
95 /* guest cycle counter */
96
97 /* Conversion factor from emulated instructions to virtual clock ticks. */
98 static int icount_time_shift;
99 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
100 #define MAX_ICOUNT_SHIFT 10
101 /* Compensate for varying guest execution speed. */
102 static int64_t qemu_icount_bias;
103 static QEMUTimer *icount_rt_timer;
104 static QEMUTimer *icount_vm_timer;
105 static QEMUTimer *icount_warp_timer;
106 static int64_t vm_clock_warp_start;
107 static int64_t qemu_icount;
108
109 typedef struct TimersState {
110 int64_t cpu_ticks_prev;
111 int64_t cpu_ticks_offset;
112 int64_t cpu_clock_offset;
113 int32_t cpu_ticks_enabled;
114 int64_t dummy;
115 } TimersState;
116
117 TimersState timers_state;
118
119 /* Return the virtual CPU time, based on the instruction counter. */
120 int64_t cpu_get_icount(void)
121 {
122 int64_t icount;
123 CPUArchState *env = cpu_single_env;
124
125 icount = qemu_icount;
126 if (env) {
127 if (!can_do_io(env)) {
128 fprintf(stderr, "Bad clock read\n");
129 }
130 icount -= (env->icount_decr.u16.low + env->icount_extra);
131 }
132 return qemu_icount_bias + (icount << icount_time_shift);
133 }
134
135 /* return the host CPU cycle counter and handle stop/restart */
136 int64_t cpu_get_ticks(void)
137 {
138 if (use_icount) {
139 return cpu_get_icount();
140 }
141 if (!timers_state.cpu_ticks_enabled) {
142 return timers_state.cpu_ticks_offset;
143 } else {
144 int64_t ticks;
145 ticks = cpu_get_real_ticks();
146 if (timers_state.cpu_ticks_prev > ticks) {
147 /* Note: non increasing ticks may happen if the host uses
148 software suspend */
149 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
150 }
151 timers_state.cpu_ticks_prev = ticks;
152 return ticks + timers_state.cpu_ticks_offset;
153 }
154 }
155
156 /* return the host CPU monotonic timer and handle stop/restart */
157 int64_t cpu_get_clock(void)
158 {
159 int64_t ti;
160 if (!timers_state.cpu_ticks_enabled) {
161 return timers_state.cpu_clock_offset;
162 } else {
163 ti = get_clock();
164 return ti + timers_state.cpu_clock_offset;
165 }
166 }
167
168 /* enable cpu_get_ticks() */
169 void cpu_enable_ticks(void)
170 {
171 if (!timers_state.cpu_ticks_enabled) {
172 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
173 timers_state.cpu_clock_offset -= get_clock();
174 timers_state.cpu_ticks_enabled = 1;
175 }
176 }
177
178 /* disable cpu_get_ticks() : the clock is stopped. You must not call
179 cpu_get_ticks() after that. */
180 void cpu_disable_ticks(void)
181 {
182 if (timers_state.cpu_ticks_enabled) {
183 timers_state.cpu_ticks_offset = cpu_get_ticks();
184 timers_state.cpu_clock_offset = cpu_get_clock();
185 timers_state.cpu_ticks_enabled = 0;
186 }
187 }
188
189 /* Correlation between real and virtual time is always going to be
190 fairly approximate, so ignore small variation.
191 When the guest is idle real and virtual time will be aligned in
192 the IO wait loop. */
193 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
194
195 static void icount_adjust(void)
196 {
197 int64_t cur_time;
198 int64_t cur_icount;
199 int64_t delta;
200 static int64_t last_delta;
201 /* If the VM is not running, then do nothing. */
202 if (!runstate_is_running()) {
203 return;
204 }
205 cur_time = cpu_get_clock();
206 cur_icount = qemu_get_clock_ns(vm_clock);
207 delta = cur_icount - cur_time;
208 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
209 if (delta > 0
210 && last_delta + ICOUNT_WOBBLE < delta * 2
211 && icount_time_shift > 0) {
212 /* The guest is getting too far ahead. Slow time down. */
213 icount_time_shift--;
214 }
215 if (delta < 0
216 && last_delta - ICOUNT_WOBBLE > delta * 2
217 && icount_time_shift < MAX_ICOUNT_SHIFT) {
218 /* The guest is getting too far behind. Speed time up. */
219 icount_time_shift++;
220 }
221 last_delta = delta;
222 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
223 }
224
225 static void icount_adjust_rt(void *opaque)
226 {
227 qemu_mod_timer(icount_rt_timer,
228 qemu_get_clock_ms(rt_clock) + 1000);
229 icount_adjust();
230 }
231
232 static void icount_adjust_vm(void *opaque)
233 {
234 qemu_mod_timer(icount_vm_timer,
235 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
236 icount_adjust();
237 }
238
239 static int64_t qemu_icount_round(int64_t count)
240 {
241 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
242 }
243
244 static void icount_warp_rt(void *opaque)
245 {
246 if (vm_clock_warp_start == -1) {
247 return;
248 }
249
250 if (runstate_is_running()) {
251 int64_t clock = qemu_get_clock_ns(rt_clock);
252 int64_t warp_delta = clock - vm_clock_warp_start;
253 if (use_icount == 1) {
254 qemu_icount_bias += warp_delta;
255 } else {
256 /*
257 * In adaptive mode, do not let the vm_clock run too
258 * far ahead of real time.
259 */
260 int64_t cur_time = cpu_get_clock();
261 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
262 int64_t delta = cur_time - cur_icount;
263 qemu_icount_bias += MIN(warp_delta, delta);
264 }
265 if (qemu_clock_expired(vm_clock)) {
266 qemu_notify_event();
267 }
268 }
269 vm_clock_warp_start = -1;
270 }
271
272 void qtest_clock_warp(int64_t dest)
273 {
274 int64_t clock = qemu_get_clock_ns(vm_clock);
275 assert(qtest_enabled());
276 while (clock < dest) {
277 int64_t deadline = qemu_clock_deadline(vm_clock);
278 int64_t warp = MIN(dest - clock, deadline);
279 qemu_icount_bias += warp;
280 qemu_run_timers(vm_clock);
281 clock = qemu_get_clock_ns(vm_clock);
282 }
283 qemu_notify_event();
284 }
285
286 void qemu_clock_warp(QEMUClock *clock)
287 {
288 int64_t deadline;
289
290 /*
291 * There are too many global variables to make the "warp" behavior
292 * applicable to other clocks. But a clock argument removes the
293 * need for if statements all over the place.
294 */
295 if (clock != vm_clock || !use_icount) {
296 return;
297 }
298
299 /*
300 * If the CPUs have been sleeping, advance the vm_clock timer now. This
301 * ensures that the deadline for the timer is computed correctly below.
302 * This also makes sure that the insn counter is synchronized before the
303 * CPU starts running, in case the CPU is woken by an event other than
304 * the earliest vm_clock timer.
305 */
306 icount_warp_rt(NULL);
307 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
308 qemu_del_timer(icount_warp_timer);
309 return;
310 }
311
312 if (qtest_enabled()) {
313 /* When testing, qtest commands advance icount. */
314 return;
315 }
316
317 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
318 deadline = qemu_clock_deadline(vm_clock);
319 if (deadline > 0) {
320 /*
321 * Ensure the vm_clock proceeds even when the virtual CPU goes to
322 * sleep. Otherwise, the CPU might be waiting for a future timer
323 * interrupt to wake it up, but the interrupt never comes because
324 * the vCPU isn't running any insns and thus doesn't advance the
325 * vm_clock.
326 *
327 * An extreme solution for this problem would be to never let VCPUs
328 * sleep in icount mode if there is a pending vm_clock timer; rather
329 * time could just advance to the next vm_clock event. Instead, we
330 * do stop VCPUs and only advance vm_clock after some "real" time,
331 * (related to the time left until the next event) has passed. This
332 * rt_clock timer will do this. This avoids that the warps are too
333 * visible externally---for example, you will not be sending network
334 * packets continuously instead of every 100ms.
335 */
336 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
337 } else {
338 qemu_notify_event();
339 }
340 }
341
342 static const VMStateDescription vmstate_timers = {
343 .name = "timer",
344 .version_id = 2,
345 .minimum_version_id = 1,
346 .minimum_version_id_old = 1,
347 .fields = (VMStateField[]) {
348 VMSTATE_INT64(cpu_ticks_offset, TimersState),
349 VMSTATE_INT64(dummy, TimersState),
350 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
351 VMSTATE_END_OF_LIST()
352 }
353 };
354
355 void configure_icount(const char *option)
356 {
357 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
358 if (!option) {
359 return;
360 }
361
362 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
363 if (strcmp(option, "auto") != 0) {
364 icount_time_shift = strtol(option, NULL, 0);
365 use_icount = 1;
366 return;
367 }
368
369 use_icount = 2;
370
371 /* 125MIPS seems a reasonable initial guess at the guest speed.
372 It will be corrected fairly quickly anyway. */
373 icount_time_shift = 3;
374
375 /* Have both realtime and virtual time triggers for speed adjustment.
376 The realtime trigger catches emulated time passing too slowly,
377 the virtual time trigger catches emulated time passing too fast.
378 Realtime triggers occur even when idle, so use them less frequently
379 than VM triggers. */
380 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
381 qemu_mod_timer(icount_rt_timer,
382 qemu_get_clock_ms(rt_clock) + 1000);
383 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
384 qemu_mod_timer(icount_vm_timer,
385 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
386 }
387
388 /***********************************************************/
389 void hw_error(const char *fmt, ...)
390 {
391 va_list ap;
392 CPUArchState *env;
393
394 va_start(ap, fmt);
395 fprintf(stderr, "qemu: hardware error: ");
396 vfprintf(stderr, fmt, ap);
397 fprintf(stderr, "\n");
398 for(env = first_cpu; env != NULL; env = env->next_cpu) {
399 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
400 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
401 }
402 va_end(ap);
403 abort();
404 }
405
406 void cpu_synchronize_all_states(void)
407 {
408 CPUArchState *cpu;
409
410 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
411 cpu_synchronize_state(cpu);
412 }
413 }
414
415 void cpu_synchronize_all_post_reset(void)
416 {
417 CPUArchState *cpu;
418
419 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
420 cpu_synchronize_post_reset(cpu);
421 }
422 }
423
424 void cpu_synchronize_all_post_init(void)
425 {
426 CPUArchState *cpu;
427
428 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
429 cpu_synchronize_post_init(cpu);
430 }
431 }
432
433 int cpu_is_stopped(CPUArchState *env)
434 {
435 CPUState *cpu = ENV_GET_CPU(env);
436
437 return !runstate_is_running() || cpu->stopped;
438 }
439
440 static void do_vm_stop(RunState state)
441 {
442 if (runstate_is_running()) {
443 cpu_disable_ticks();
444 pause_all_vcpus();
445 runstate_set(state);
446 vm_state_notify(0, state);
447 bdrv_drain_all();
448 bdrv_flush_all();
449 monitor_protocol_event(QEVENT_STOP, NULL);
450 }
451 }
452
453 static int cpu_can_run(CPUArchState *env)
454 {
455 CPUState *cpu = ENV_GET_CPU(env);
456
457 if (cpu->stop) {
458 return 0;
459 }
460 if (cpu->stopped || !runstate_is_running()) {
461 return 0;
462 }
463 return 1;
464 }
465
466 static void cpu_handle_guest_debug(CPUArchState *env)
467 {
468 CPUState *cpu = ENV_GET_CPU(env);
469
470 gdb_set_stop_cpu(env);
471 qemu_system_debug_request();
472 cpu->stopped = true;
473 }
474
475 static void cpu_signal(int sig)
476 {
477 if (cpu_single_env) {
478 cpu_exit(cpu_single_env);
479 }
480 exit_request = 1;
481 }
482
483 #ifdef CONFIG_LINUX
484 static void sigbus_reraise(void)
485 {
486 sigset_t set;
487 struct sigaction action;
488
489 memset(&action, 0, sizeof(action));
490 action.sa_handler = SIG_DFL;
491 if (!sigaction(SIGBUS, &action, NULL)) {
492 raise(SIGBUS);
493 sigemptyset(&set);
494 sigaddset(&set, SIGBUS);
495 sigprocmask(SIG_UNBLOCK, &set, NULL);
496 }
497 perror("Failed to re-raise SIGBUS!\n");
498 abort();
499 }
500
501 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
502 void *ctx)
503 {
504 if (kvm_on_sigbus(siginfo->ssi_code,
505 (void *)(intptr_t)siginfo->ssi_addr)) {
506 sigbus_reraise();
507 }
508 }
509
510 static void qemu_init_sigbus(void)
511 {
512 struct sigaction action;
513
514 memset(&action, 0, sizeof(action));
515 action.sa_flags = SA_SIGINFO;
516 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
517 sigaction(SIGBUS, &action, NULL);
518
519 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
520 }
521
522 static void qemu_kvm_eat_signals(CPUArchState *env)
523 {
524 struct timespec ts = { 0, 0 };
525 siginfo_t siginfo;
526 sigset_t waitset;
527 sigset_t chkset;
528 int r;
529
530 sigemptyset(&waitset);
531 sigaddset(&waitset, SIG_IPI);
532 sigaddset(&waitset, SIGBUS);
533
534 do {
535 r = sigtimedwait(&waitset, &siginfo, &ts);
536 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
537 perror("sigtimedwait");
538 exit(1);
539 }
540
541 switch (r) {
542 case SIGBUS:
543 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
544 sigbus_reraise();
545 }
546 break;
547 default:
548 break;
549 }
550
551 r = sigpending(&chkset);
552 if (r == -1) {
553 perror("sigpending");
554 exit(1);
555 }
556 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
557 }
558
559 #else /* !CONFIG_LINUX */
560
561 static void qemu_init_sigbus(void)
562 {
563 }
564
565 static void qemu_kvm_eat_signals(CPUArchState *env)
566 {
567 }
568 #endif /* !CONFIG_LINUX */
569
570 #ifndef _WIN32
571 static void dummy_signal(int sig)
572 {
573 }
574
575 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
576 {
577 int r;
578 sigset_t set;
579 struct sigaction sigact;
580
581 memset(&sigact, 0, sizeof(sigact));
582 sigact.sa_handler = dummy_signal;
583 sigaction(SIG_IPI, &sigact, NULL);
584
585 pthread_sigmask(SIG_BLOCK, NULL, &set);
586 sigdelset(&set, SIG_IPI);
587 sigdelset(&set, SIGBUS);
588 r = kvm_set_signal_mask(env, &set);
589 if (r) {
590 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
591 exit(1);
592 }
593 }
594
595 static void qemu_tcg_init_cpu_signals(void)
596 {
597 sigset_t set;
598 struct sigaction sigact;
599
600 memset(&sigact, 0, sizeof(sigact));
601 sigact.sa_handler = cpu_signal;
602 sigaction(SIG_IPI, &sigact, NULL);
603
604 sigemptyset(&set);
605 sigaddset(&set, SIG_IPI);
606 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
607 }
608
609 #else /* _WIN32 */
610 static void qemu_kvm_init_cpu_signals(CPUArchState *env)
611 {
612 abort();
613 }
614
615 static void qemu_tcg_init_cpu_signals(void)
616 {
617 }
618 #endif /* _WIN32 */
619
620 static QemuMutex qemu_global_mutex;
621 static QemuCond qemu_io_proceeded_cond;
622 static bool iothread_requesting_mutex;
623
624 static QemuThread io_thread;
625
626 static QemuThread *tcg_cpu_thread;
627 static QemuCond *tcg_halt_cond;
628
629 /* cpu creation */
630 static QemuCond qemu_cpu_cond;
631 /* system init */
632 static QemuCond qemu_pause_cond;
633 static QemuCond qemu_work_cond;
634
635 void qemu_init_cpu_loop(void)
636 {
637 qemu_init_sigbus();
638 qemu_cond_init(&qemu_cpu_cond);
639 qemu_cond_init(&qemu_pause_cond);
640 qemu_cond_init(&qemu_work_cond);
641 qemu_cond_init(&qemu_io_proceeded_cond);
642 qemu_mutex_init(&qemu_global_mutex);
643
644 qemu_thread_get_self(&io_thread);
645 }
646
647 void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
648 {
649 CPUState *cpu = ENV_GET_CPU(env);
650 struct qemu_work_item wi;
651
652 if (qemu_cpu_is_self(cpu)) {
653 func(data);
654 return;
655 }
656
657 wi.func = func;
658 wi.data = data;
659 if (!env->queued_work_first) {
660 env->queued_work_first = &wi;
661 } else {
662 env->queued_work_last->next = &wi;
663 }
664 env->queued_work_last = &wi;
665 wi.next = NULL;
666 wi.done = false;
667
668 qemu_cpu_kick(env);
669 while (!wi.done) {
670 CPUArchState *self_env = cpu_single_env;
671
672 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
673 cpu_single_env = self_env;
674 }
675 }
676
677 static void flush_queued_work(CPUArchState *env)
678 {
679 struct qemu_work_item *wi;
680
681 if (!env->queued_work_first) {
682 return;
683 }
684
685 while ((wi = env->queued_work_first)) {
686 env->queued_work_first = wi->next;
687 wi->func(wi->data);
688 wi->done = true;
689 }
690 env->queued_work_last = NULL;
691 qemu_cond_broadcast(&qemu_work_cond);
692 }
693
694 static void qemu_wait_io_event_common(CPUArchState *env)
695 {
696 CPUState *cpu = ENV_GET_CPU(env);
697
698 if (cpu->stop) {
699 cpu->stop = false;
700 cpu->stopped = true;
701 qemu_cond_signal(&qemu_pause_cond);
702 }
703 flush_queued_work(env);
704 cpu->thread_kicked = false;
705 }
706
707 static void qemu_tcg_wait_io_event(void)
708 {
709 CPUArchState *env;
710
711 while (all_cpu_threads_idle()) {
712 /* Start accounting real time to the virtual clock if the CPUs
713 are idle. */
714 qemu_clock_warp(vm_clock);
715 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
716 }
717
718 while (iothread_requesting_mutex) {
719 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
720 }
721
722 for (env = first_cpu; env != NULL; env = env->next_cpu) {
723 qemu_wait_io_event_common(env);
724 }
725 }
726
727 static void qemu_kvm_wait_io_event(CPUArchState *env)
728 {
729 while (cpu_thread_is_idle(env)) {
730 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
731 }
732
733 qemu_kvm_eat_signals(env);
734 qemu_wait_io_event_common(env);
735 }
736
737 static void *qemu_kvm_cpu_thread_fn(void *arg)
738 {
739 CPUArchState *env = arg;
740 CPUState *cpu = ENV_GET_CPU(env);
741 int r;
742
743 qemu_mutex_lock(&qemu_global_mutex);
744 qemu_thread_get_self(cpu->thread);
745 env->thread_id = qemu_get_thread_id();
746 cpu_single_env = env;
747
748 r = kvm_init_vcpu(env);
749 if (r < 0) {
750 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
751 exit(1);
752 }
753
754 qemu_kvm_init_cpu_signals(env);
755
756 /* signal CPU creation */
757 cpu->created = true;
758 qemu_cond_signal(&qemu_cpu_cond);
759
760 while (1) {
761 if (cpu_can_run(env)) {
762 r = kvm_cpu_exec(env);
763 if (r == EXCP_DEBUG) {
764 cpu_handle_guest_debug(env);
765 }
766 }
767 qemu_kvm_wait_io_event(env);
768 }
769
770 return NULL;
771 }
772
773 static void *qemu_dummy_cpu_thread_fn(void *arg)
774 {
775 #ifdef _WIN32
776 fprintf(stderr, "qtest is not supported under Windows\n");
777 exit(1);
778 #else
779 CPUArchState *env = arg;
780 CPUState *cpu = ENV_GET_CPU(env);
781 sigset_t waitset;
782 int r;
783
784 qemu_mutex_lock_iothread();
785 qemu_thread_get_self(cpu->thread);
786 env->thread_id = qemu_get_thread_id();
787
788 sigemptyset(&waitset);
789 sigaddset(&waitset, SIG_IPI);
790
791 /* signal CPU creation */
792 cpu->created = true;
793 qemu_cond_signal(&qemu_cpu_cond);
794
795 cpu_single_env = env;
796 while (1) {
797 cpu_single_env = NULL;
798 qemu_mutex_unlock_iothread();
799 do {
800 int sig;
801 r = sigwait(&waitset, &sig);
802 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
803 if (r == -1) {
804 perror("sigwait");
805 exit(1);
806 }
807 qemu_mutex_lock_iothread();
808 cpu_single_env = env;
809 qemu_wait_io_event_common(env);
810 }
811
812 return NULL;
813 #endif
814 }
815
816 static void tcg_exec_all(void);
817
818 static void *qemu_tcg_cpu_thread_fn(void *arg)
819 {
820 CPUArchState *env = arg;
821 CPUState *cpu = ENV_GET_CPU(env);
822
823 qemu_tcg_init_cpu_signals();
824 qemu_thread_get_self(cpu->thread);
825
826 /* signal CPU creation */
827 qemu_mutex_lock(&qemu_global_mutex);
828 for (env = first_cpu; env != NULL; env = env->next_cpu) {
829 cpu = ENV_GET_CPU(env);
830 env->thread_id = qemu_get_thread_id();
831 cpu->created = true;
832 }
833 qemu_cond_signal(&qemu_cpu_cond);
834
835 /* wait for initial kick-off after machine start */
836 while (ENV_GET_CPU(first_cpu)->stopped) {
837 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
838
839 /* process any pending work */
840 for (env = first_cpu; env != NULL; env = env->next_cpu) {
841 qemu_wait_io_event_common(env);
842 }
843 }
844
845 while (1) {
846 tcg_exec_all();
847 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
848 qemu_notify_event();
849 }
850 qemu_tcg_wait_io_event();
851 }
852
853 return NULL;
854 }
855
856 static void qemu_cpu_kick_thread(CPUState *cpu)
857 {
858 #ifndef _WIN32
859 int err;
860
861 err = pthread_kill(cpu->thread->thread, SIG_IPI);
862 if (err) {
863 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
864 exit(1);
865 }
866 #else /* _WIN32 */
867 if (!qemu_cpu_is_self(cpu)) {
868 SuspendThread(cpu->hThread);
869 cpu_signal(0);
870 ResumeThread(cpu->hThread);
871 }
872 #endif
873 }
874
875 void qemu_cpu_kick(void *_env)
876 {
877 CPUArchState *env = _env;
878 CPUState *cpu = ENV_GET_CPU(env);
879
880 qemu_cond_broadcast(env->halt_cond);
881 if (!tcg_enabled() && !cpu->thread_kicked) {
882 qemu_cpu_kick_thread(cpu);
883 cpu->thread_kicked = true;
884 }
885 }
886
887 void qemu_cpu_kick_self(void)
888 {
889 #ifndef _WIN32
890 assert(cpu_single_env);
891 CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
892
893 if (!cpu_single_cpu->thread_kicked) {
894 qemu_cpu_kick_thread(cpu_single_cpu);
895 cpu_single_cpu->thread_kicked = true;
896 }
897 #else
898 abort();
899 #endif
900 }
901
902 bool qemu_cpu_is_self(CPUState *cpu)
903 {
904 return qemu_thread_is_self(cpu->thread);
905 }
906
907 static bool qemu_in_vcpu_thread(void)
908 {
909 return cpu_single_env && qemu_cpu_is_self(ENV_GET_CPU(cpu_single_env));
910 }
911
912 void qemu_mutex_lock_iothread(void)
913 {
914 if (!tcg_enabled()) {
915 qemu_mutex_lock(&qemu_global_mutex);
916 } else {
917 iothread_requesting_mutex = true;
918 if (qemu_mutex_trylock(&qemu_global_mutex)) {
919 qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
920 qemu_mutex_lock(&qemu_global_mutex);
921 }
922 iothread_requesting_mutex = false;
923 qemu_cond_broadcast(&qemu_io_proceeded_cond);
924 }
925 }
926
927 void qemu_mutex_unlock_iothread(void)
928 {
929 qemu_mutex_unlock(&qemu_global_mutex);
930 }
931
932 static int all_vcpus_paused(void)
933 {
934 CPUArchState *penv = first_cpu;
935
936 while (penv) {
937 CPUState *pcpu = ENV_GET_CPU(penv);
938 if (!pcpu->stopped) {
939 return 0;
940 }
941 penv = penv->next_cpu;
942 }
943
944 return 1;
945 }
946
947 void pause_all_vcpus(void)
948 {
949 CPUArchState *penv = first_cpu;
950
951 qemu_clock_enable(vm_clock, false);
952 while (penv) {
953 CPUState *pcpu = ENV_GET_CPU(penv);
954 pcpu->stop = true;
955 qemu_cpu_kick(penv);
956 penv = penv->next_cpu;
957 }
958
959 if (qemu_in_vcpu_thread()) {
960 cpu_stop_current();
961 if (!kvm_enabled()) {
962 while (penv) {
963 CPUState *pcpu = ENV_GET_CPU(penv);
964 pcpu->stop = 0;
965 pcpu->stopped = true;
966 penv = penv->next_cpu;
967 }
968 return;
969 }
970 }
971
972 while (!all_vcpus_paused()) {
973 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
974 penv = first_cpu;
975 while (penv) {
976 qemu_cpu_kick(penv);
977 penv = penv->next_cpu;
978 }
979 }
980 }
981
982 void resume_all_vcpus(void)
983 {
984 CPUArchState *penv = first_cpu;
985
986 qemu_clock_enable(vm_clock, true);
987 while (penv) {
988 CPUState *pcpu = ENV_GET_CPU(penv);
989 pcpu->stop = false;
990 pcpu->stopped = false;
991 qemu_cpu_kick(penv);
992 penv = penv->next_cpu;
993 }
994 }
995
996 static void qemu_tcg_init_vcpu(void *_env)
997 {
998 CPUArchState *env = _env;
999 CPUState *cpu = ENV_GET_CPU(env);
1000
1001 /* share a single thread for all cpus with TCG */
1002 if (!tcg_cpu_thread) {
1003 cpu->thread = g_malloc0(sizeof(QemuThread));
1004 env->halt_cond = g_malloc0(sizeof(QemuCond));
1005 qemu_cond_init(env->halt_cond);
1006 tcg_halt_cond = env->halt_cond;
1007 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, env,
1008 QEMU_THREAD_JOINABLE);
1009 #ifdef _WIN32
1010 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1011 #endif
1012 while (!cpu->created) {
1013 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1014 }
1015 tcg_cpu_thread = cpu->thread;
1016 } else {
1017 cpu->thread = tcg_cpu_thread;
1018 env->halt_cond = tcg_halt_cond;
1019 }
1020 }
1021
1022 static void qemu_kvm_start_vcpu(CPUArchState *env)
1023 {
1024 CPUState *cpu = ENV_GET_CPU(env);
1025
1026 cpu->thread = g_malloc0(sizeof(QemuThread));
1027 env->halt_cond = g_malloc0(sizeof(QemuCond));
1028 qemu_cond_init(env->halt_cond);
1029 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, env,
1030 QEMU_THREAD_JOINABLE);
1031 while (!cpu->created) {
1032 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1033 }
1034 }
1035
1036 static void qemu_dummy_start_vcpu(CPUArchState *env)
1037 {
1038 CPUState *cpu = ENV_GET_CPU(env);
1039
1040 cpu->thread = g_malloc0(sizeof(QemuThread));
1041 env->halt_cond = g_malloc0(sizeof(QemuCond));
1042 qemu_cond_init(env->halt_cond);
1043 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, env,
1044 QEMU_THREAD_JOINABLE);
1045 while (!cpu->created) {
1046 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1047 }
1048 }
1049
1050 void qemu_init_vcpu(void *_env)
1051 {
1052 CPUArchState *env = _env;
1053 CPUState *cpu = ENV_GET_CPU(env);
1054
1055 env->nr_cores = smp_cores;
1056 env->nr_threads = smp_threads;
1057 cpu->stopped = true;
1058 if (kvm_enabled()) {
1059 qemu_kvm_start_vcpu(env);
1060 } else if (tcg_enabled()) {
1061 qemu_tcg_init_vcpu(env);
1062 } else {
1063 qemu_dummy_start_vcpu(env);
1064 }
1065 }
1066
1067 void cpu_stop_current(void)
1068 {
1069 if (cpu_single_env) {
1070 CPUState *cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
1071 cpu_single_cpu->stop = false;
1072 cpu_single_cpu->stopped = true;
1073 cpu_exit(cpu_single_env);
1074 qemu_cond_signal(&qemu_pause_cond);
1075 }
1076 }
1077
1078 void vm_stop(RunState state)
1079 {
1080 if (qemu_in_vcpu_thread()) {
1081 qemu_system_vmstop_request(state);
1082 /*
1083 * FIXME: should not return to device code in case
1084 * vm_stop() has been requested.
1085 */
1086 cpu_stop_current();
1087 return;
1088 }
1089 do_vm_stop(state);
1090 }
1091
1092 /* does a state transition even if the VM is already stopped,
1093 current state is forgotten forever */
1094 void vm_stop_force_state(RunState state)
1095 {
1096 if (runstate_is_running()) {
1097 vm_stop(state);
1098 } else {
1099 runstate_set(state);
1100 }
1101 }
1102
1103 static int tcg_cpu_exec(CPUArchState *env)
1104 {
1105 int ret;
1106 #ifdef CONFIG_PROFILER
1107 int64_t ti;
1108 #endif
1109
1110 #ifdef CONFIG_PROFILER
1111 ti = profile_getclock();
1112 #endif
1113 if (use_icount) {
1114 int64_t count;
1115 int decr;
1116 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1117 env->icount_decr.u16.low = 0;
1118 env->icount_extra = 0;
1119 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1120 qemu_icount += count;
1121 decr = (count > 0xffff) ? 0xffff : count;
1122 count -= decr;
1123 env->icount_decr.u16.low = decr;
1124 env->icount_extra = count;
1125 }
1126 ret = cpu_exec(env);
1127 #ifdef CONFIG_PROFILER
1128 qemu_time += profile_getclock() - ti;
1129 #endif
1130 if (use_icount) {
1131 /* Fold pending instructions back into the
1132 instruction counter, and clear the interrupt flag. */
1133 qemu_icount -= (env->icount_decr.u16.low
1134 + env->icount_extra);
1135 env->icount_decr.u32 = 0;
1136 env->icount_extra = 0;
1137 }
1138 return ret;
1139 }
1140
1141 static void tcg_exec_all(void)
1142 {
1143 int r;
1144
1145 /* Account partial waits to the vm_clock. */
1146 qemu_clock_warp(vm_clock);
1147
1148 if (next_cpu == NULL) {
1149 next_cpu = first_cpu;
1150 }
1151 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1152 CPUArchState *env = next_cpu;
1153 CPUState *cpu = ENV_GET_CPU(env);
1154
1155 qemu_clock_enable(vm_clock,
1156 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1157
1158 if (cpu_can_run(env)) {
1159 r = tcg_cpu_exec(env);
1160 if (r == EXCP_DEBUG) {
1161 cpu_handle_guest_debug(env);
1162 break;
1163 }
1164 } else if (cpu->stop || cpu->stopped) {
1165 break;
1166 }
1167 }
1168 exit_request = 0;
1169 }
1170
1171 void set_numa_modes(void)
1172 {
1173 CPUArchState *env;
1174 int i;
1175
1176 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1177 for (i = 0; i < nb_numa_nodes; i++) {
1178 if (test_bit(env->cpu_index, node_cpumask[i])) {
1179 env->numa_node = i;
1180 }
1181 }
1182 }
1183 }
1184
1185 void set_cpu_log(const char *optarg)
1186 {
1187 int mask;
1188 const CPULogItem *item;
1189
1190 mask = cpu_str_to_log_mask(optarg);
1191 if (!mask) {
1192 printf("Log items (comma separated):\n");
1193 for (item = cpu_log_items; item->mask != 0; item++) {
1194 printf("%-10s %s\n", item->name, item->help);
1195 }
1196 exit(1);
1197 }
1198 cpu_set_log(mask);
1199 }
1200
1201 void set_cpu_log_filename(const char *optarg)
1202 {
1203 cpu_set_log_filename(optarg);
1204 }
1205
1206 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1207 {
1208 /* XXX: implement xxx_cpu_list for targets that still miss it */
1209 #if defined(cpu_list)
1210 cpu_list(f, cpu_fprintf);
1211 #endif
1212 }
1213
1214 CpuInfoList *qmp_query_cpus(Error **errp)
1215 {
1216 CpuInfoList *head = NULL, *cur_item = NULL;
1217 CPUArchState *env;
1218
1219 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1220 CpuInfoList *info;
1221
1222 cpu_synchronize_state(env);
1223
1224 info = g_malloc0(sizeof(*info));
1225 info->value = g_malloc0(sizeof(*info->value));
1226 info->value->CPU = env->cpu_index;
1227 info->value->current = (env == first_cpu);
1228 info->value->halted = env->halted;
1229 info->value->thread_id = env->thread_id;
1230 #if defined(TARGET_I386)
1231 info->value->has_pc = true;
1232 info->value->pc = env->eip + env->segs[R_CS].base;
1233 #elif defined(TARGET_PPC)
1234 info->value->has_nip = true;
1235 info->value->nip = env->nip;
1236 #elif defined(TARGET_SPARC)
1237 info->value->has_pc = true;
1238 info->value->pc = env->pc;
1239 info->value->has_npc = true;
1240 info->value->npc = env->npc;
1241 #elif defined(TARGET_MIPS)
1242 info->value->has_PC = true;
1243 info->value->PC = env->active_tc.PC;
1244 #endif
1245
1246 /* XXX: waiting for the qapi to support GSList */
1247 if (!cur_item) {
1248 head = cur_item = info;
1249 } else {
1250 cur_item->next = info;
1251 cur_item = info;
1252 }
1253 }
1254
1255 return head;
1256 }
1257
1258 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1259 bool has_cpu, int64_t cpu_index, Error **errp)
1260 {
1261 FILE *f;
1262 uint32_t l;
1263 CPUArchState *env;
1264 uint8_t buf[1024];
1265
1266 if (!has_cpu) {
1267 cpu_index = 0;
1268 }
1269
1270 for (env = first_cpu; env; env = env->next_cpu) {
1271 if (cpu_index == env->cpu_index) {
1272 break;
1273 }
1274 }
1275
1276 if (env == NULL) {
1277 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1278 "a CPU number");
1279 return;
1280 }
1281
1282 f = fopen(filename, "wb");
1283 if (!f) {
1284 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1285 return;
1286 }
1287
1288 while (size != 0) {
1289 l = sizeof(buf);
1290 if (l > size)
1291 l = size;
1292 cpu_memory_rw_debug(env, addr, buf, l, 0);
1293 if (fwrite(buf, 1, l, f) != l) {
1294 error_set(errp, QERR_IO_ERROR);
1295 goto exit;
1296 }
1297 addr += l;
1298 size -= l;
1299 }
1300
1301 exit:
1302 fclose(f);
1303 }
1304
1305 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1306 Error **errp)
1307 {
1308 FILE *f;
1309 uint32_t l;
1310 uint8_t buf[1024];
1311
1312 f = fopen(filename, "wb");
1313 if (!f) {
1314 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1315 return;
1316 }
1317
1318 while (size != 0) {
1319 l = sizeof(buf);
1320 if (l > size)
1321 l = size;
1322 cpu_physical_memory_rw(addr, buf, l, 0);
1323 if (fwrite(buf, 1, l, f) != l) {
1324 error_set(errp, QERR_IO_ERROR);
1325 goto exit;
1326 }
1327 addr += l;
1328 size -= l;
1329 }
1330
1331 exit:
1332 fclose(f);
1333 }
1334
1335 void qmp_inject_nmi(Error **errp)
1336 {
1337 #if defined(TARGET_I386)
1338 CPUArchState *env;
1339
1340 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1341 if (!env->apic_state) {
1342 cpu_interrupt(env, CPU_INTERRUPT_NMI);
1343 } else {
1344 apic_deliver_nmi(env->apic_state);
1345 }
1346 }
1347 #else
1348 error_set(errp, QERR_UNSUPPORTED);
1349 #endif
1350 }