]> git.proxmox.com Git - qemu.git/blob - cpus.c
cpu: Replace cpu_single_env with CPUState current_cpu
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40
41 #ifndef _WIN32
42 #include "qemu/compatfd.h"
43 #endif
44
45 #ifdef CONFIG_LINUX
46
47 #include <sys/prctl.h>
48
49 #ifndef PR_MCE_KILL
50 #define PR_MCE_KILL 33
51 #endif
52
53 #ifndef PR_MCE_KILL_SET
54 #define PR_MCE_KILL_SET 1
55 #endif
56
57 #ifndef PR_MCE_KILL_EARLY
58 #define PR_MCE_KILL_EARLY 1
59 #endif
60
61 #endif /* CONFIG_LINUX */
62
63 static CPUArchState *next_cpu;
64
65 static bool cpu_thread_is_idle(CPUState *cpu)
66 {
67 if (cpu->stop || cpu->queued_work_first) {
68 return false;
69 }
70 if (cpu->stopped || !runstate_is_running()) {
71 return true;
72 }
73 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
74 kvm_halt_in_kernel()) {
75 return false;
76 }
77 return true;
78 }
79
80 static bool all_cpu_threads_idle(void)
81 {
82 CPUArchState *env;
83
84 for (env = first_cpu; env != NULL; env = env->next_cpu) {
85 if (!cpu_thread_is_idle(ENV_GET_CPU(env))) {
86 return false;
87 }
88 }
89 return true;
90 }
91
92 /***********************************************************/
93 /* guest cycle counter */
94
95 /* Conversion factor from emulated instructions to virtual clock ticks. */
96 static int icount_time_shift;
97 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
98 #define MAX_ICOUNT_SHIFT 10
99 /* Compensate for varying guest execution speed. */
100 static int64_t qemu_icount_bias;
101 static QEMUTimer *icount_rt_timer;
102 static QEMUTimer *icount_vm_timer;
103 static QEMUTimer *icount_warp_timer;
104 static int64_t vm_clock_warp_start;
105 static int64_t qemu_icount;
106
107 typedef struct TimersState {
108 int64_t cpu_ticks_prev;
109 int64_t cpu_ticks_offset;
110 int64_t cpu_clock_offset;
111 int32_t cpu_ticks_enabled;
112 int64_t dummy;
113 } TimersState;
114
115 TimersState timers_state;
116
117 /* Return the virtual CPU time, based on the instruction counter. */
118 int64_t cpu_get_icount(void)
119 {
120 int64_t icount;
121 CPUState *cpu = current_cpu;
122
123 icount = qemu_icount;
124 if (cpu) {
125 CPUArchState *env = cpu->env_ptr;
126 if (!can_do_io(env)) {
127 fprintf(stderr, "Bad clock read\n");
128 }
129 icount -= (env->icount_decr.u16.low + env->icount_extra);
130 }
131 return qemu_icount_bias + (icount << icount_time_shift);
132 }
133
134 /* return the host CPU cycle counter and handle stop/restart */
135 int64_t cpu_get_ticks(void)
136 {
137 if (use_icount) {
138 return cpu_get_icount();
139 }
140 if (!timers_state.cpu_ticks_enabled) {
141 return timers_state.cpu_ticks_offset;
142 } else {
143 int64_t ticks;
144 ticks = cpu_get_real_ticks();
145 if (timers_state.cpu_ticks_prev > ticks) {
146 /* Note: non increasing ticks may happen if the host uses
147 software suspend */
148 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
149 }
150 timers_state.cpu_ticks_prev = ticks;
151 return ticks + timers_state.cpu_ticks_offset;
152 }
153 }
154
155 /* return the host CPU monotonic timer and handle stop/restart */
156 int64_t cpu_get_clock(void)
157 {
158 int64_t ti;
159 if (!timers_state.cpu_ticks_enabled) {
160 return timers_state.cpu_clock_offset;
161 } else {
162 ti = get_clock();
163 return ti + timers_state.cpu_clock_offset;
164 }
165 }
166
167 /* enable cpu_get_ticks() */
168 void cpu_enable_ticks(void)
169 {
170 if (!timers_state.cpu_ticks_enabled) {
171 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
172 timers_state.cpu_clock_offset -= get_clock();
173 timers_state.cpu_ticks_enabled = 1;
174 }
175 }
176
177 /* disable cpu_get_ticks() : the clock is stopped. You must not call
178 cpu_get_ticks() after that. */
179 void cpu_disable_ticks(void)
180 {
181 if (timers_state.cpu_ticks_enabled) {
182 timers_state.cpu_ticks_offset = cpu_get_ticks();
183 timers_state.cpu_clock_offset = cpu_get_clock();
184 timers_state.cpu_ticks_enabled = 0;
185 }
186 }
187
188 /* Correlation between real and virtual time is always going to be
189 fairly approximate, so ignore small variation.
190 When the guest is idle real and virtual time will be aligned in
191 the IO wait loop. */
192 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
193
194 static void icount_adjust(void)
195 {
196 int64_t cur_time;
197 int64_t cur_icount;
198 int64_t delta;
199 static int64_t last_delta;
200 /* If the VM is not running, then do nothing. */
201 if (!runstate_is_running()) {
202 return;
203 }
204 cur_time = cpu_get_clock();
205 cur_icount = qemu_get_clock_ns(vm_clock);
206 delta = cur_icount - cur_time;
207 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
208 if (delta > 0
209 && last_delta + ICOUNT_WOBBLE < delta * 2
210 && icount_time_shift > 0) {
211 /* The guest is getting too far ahead. Slow time down. */
212 icount_time_shift--;
213 }
214 if (delta < 0
215 && last_delta - ICOUNT_WOBBLE > delta * 2
216 && icount_time_shift < MAX_ICOUNT_SHIFT) {
217 /* The guest is getting too far behind. Speed time up. */
218 icount_time_shift++;
219 }
220 last_delta = delta;
221 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
222 }
223
224 static void icount_adjust_rt(void *opaque)
225 {
226 qemu_mod_timer(icount_rt_timer,
227 qemu_get_clock_ms(rt_clock) + 1000);
228 icount_adjust();
229 }
230
231 static void icount_adjust_vm(void *opaque)
232 {
233 qemu_mod_timer(icount_vm_timer,
234 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
235 icount_adjust();
236 }
237
238 static int64_t qemu_icount_round(int64_t count)
239 {
240 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
241 }
242
243 static void icount_warp_rt(void *opaque)
244 {
245 if (vm_clock_warp_start == -1) {
246 return;
247 }
248
249 if (runstate_is_running()) {
250 int64_t clock = qemu_get_clock_ns(rt_clock);
251 int64_t warp_delta = clock - vm_clock_warp_start;
252 if (use_icount == 1) {
253 qemu_icount_bias += warp_delta;
254 } else {
255 /*
256 * In adaptive mode, do not let the vm_clock run too
257 * far ahead of real time.
258 */
259 int64_t cur_time = cpu_get_clock();
260 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
261 int64_t delta = cur_time - cur_icount;
262 qemu_icount_bias += MIN(warp_delta, delta);
263 }
264 if (qemu_clock_expired(vm_clock)) {
265 qemu_notify_event();
266 }
267 }
268 vm_clock_warp_start = -1;
269 }
270
271 void qtest_clock_warp(int64_t dest)
272 {
273 int64_t clock = qemu_get_clock_ns(vm_clock);
274 assert(qtest_enabled());
275 while (clock < dest) {
276 int64_t deadline = qemu_clock_deadline(vm_clock);
277 int64_t warp = MIN(dest - clock, deadline);
278 qemu_icount_bias += warp;
279 qemu_run_timers(vm_clock);
280 clock = qemu_get_clock_ns(vm_clock);
281 }
282 qemu_notify_event();
283 }
284
285 void qemu_clock_warp(QEMUClock *clock)
286 {
287 int64_t deadline;
288
289 /*
290 * There are too many global variables to make the "warp" behavior
291 * applicable to other clocks. But a clock argument removes the
292 * need for if statements all over the place.
293 */
294 if (clock != vm_clock || !use_icount) {
295 return;
296 }
297
298 /*
299 * If the CPUs have been sleeping, advance the vm_clock timer now. This
300 * ensures that the deadline for the timer is computed correctly below.
301 * This also makes sure that the insn counter is synchronized before the
302 * CPU starts running, in case the CPU is woken by an event other than
303 * the earliest vm_clock timer.
304 */
305 icount_warp_rt(NULL);
306 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
307 qemu_del_timer(icount_warp_timer);
308 return;
309 }
310
311 if (qtest_enabled()) {
312 /* When testing, qtest commands advance icount. */
313 return;
314 }
315
316 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
317 deadline = qemu_clock_deadline(vm_clock);
318 if (deadline > 0) {
319 /*
320 * Ensure the vm_clock proceeds even when the virtual CPU goes to
321 * sleep. Otherwise, the CPU might be waiting for a future timer
322 * interrupt to wake it up, but the interrupt never comes because
323 * the vCPU isn't running any insns and thus doesn't advance the
324 * vm_clock.
325 *
326 * An extreme solution for this problem would be to never let VCPUs
327 * sleep in icount mode if there is a pending vm_clock timer; rather
328 * time could just advance to the next vm_clock event. Instead, we
329 * do stop VCPUs and only advance vm_clock after some "real" time,
330 * (related to the time left until the next event) has passed. This
331 * rt_clock timer will do this. This avoids that the warps are too
332 * visible externally---for example, you will not be sending network
333 * packets continuously instead of every 100ms.
334 */
335 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
336 } else {
337 qemu_notify_event();
338 }
339 }
340
341 static const VMStateDescription vmstate_timers = {
342 .name = "timer",
343 .version_id = 2,
344 .minimum_version_id = 1,
345 .minimum_version_id_old = 1,
346 .fields = (VMStateField[]) {
347 VMSTATE_INT64(cpu_ticks_offset, TimersState),
348 VMSTATE_INT64(dummy, TimersState),
349 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
350 VMSTATE_END_OF_LIST()
351 }
352 };
353
354 void configure_icount(const char *option)
355 {
356 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
357 if (!option) {
358 return;
359 }
360
361 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
362 if (strcmp(option, "auto") != 0) {
363 icount_time_shift = strtol(option, NULL, 0);
364 use_icount = 1;
365 return;
366 }
367
368 use_icount = 2;
369
370 /* 125MIPS seems a reasonable initial guess at the guest speed.
371 It will be corrected fairly quickly anyway. */
372 icount_time_shift = 3;
373
374 /* Have both realtime and virtual time triggers for speed adjustment.
375 The realtime trigger catches emulated time passing too slowly,
376 the virtual time trigger catches emulated time passing too fast.
377 Realtime triggers occur even when idle, so use them less frequently
378 than VM triggers. */
379 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
380 qemu_mod_timer(icount_rt_timer,
381 qemu_get_clock_ms(rt_clock) + 1000);
382 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
383 qemu_mod_timer(icount_vm_timer,
384 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
385 }
386
387 /***********************************************************/
388 void hw_error(const char *fmt, ...)
389 {
390 va_list ap;
391 CPUArchState *env;
392 CPUState *cpu;
393
394 va_start(ap, fmt);
395 fprintf(stderr, "qemu: hardware error: ");
396 vfprintf(stderr, fmt, ap);
397 fprintf(stderr, "\n");
398 for (env = first_cpu; env != NULL; env = env->next_cpu) {
399 cpu = ENV_GET_CPU(env);
400 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
401 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
402 }
403 va_end(ap);
404 abort();
405 }
406
407 void cpu_synchronize_all_states(void)
408 {
409 CPUArchState *env;
410
411 for (env = first_cpu; env; env = env->next_cpu) {
412 cpu_synchronize_state(ENV_GET_CPU(env));
413 }
414 }
415
416 void cpu_synchronize_all_post_reset(void)
417 {
418 CPUArchState *cpu;
419
420 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
421 cpu_synchronize_post_reset(ENV_GET_CPU(cpu));
422 }
423 }
424
425 void cpu_synchronize_all_post_init(void)
426 {
427 CPUArchState *cpu;
428
429 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
430 cpu_synchronize_post_init(ENV_GET_CPU(cpu));
431 }
432 }
433
434 bool cpu_is_stopped(CPUState *cpu)
435 {
436 return !runstate_is_running() || cpu->stopped;
437 }
438
439 static void do_vm_stop(RunState state)
440 {
441 if (runstate_is_running()) {
442 cpu_disable_ticks();
443 pause_all_vcpus();
444 runstate_set(state);
445 vm_state_notify(0, state);
446 bdrv_drain_all();
447 bdrv_flush_all();
448 monitor_protocol_event(QEVENT_STOP, NULL);
449 }
450 }
451
452 static bool cpu_can_run(CPUState *cpu)
453 {
454 if (cpu->stop) {
455 return false;
456 }
457 if (cpu->stopped || !runstate_is_running()) {
458 return false;
459 }
460 return true;
461 }
462
463 static void cpu_handle_guest_debug(CPUState *cpu)
464 {
465 gdb_set_stop_cpu(cpu);
466 qemu_system_debug_request();
467 cpu->stopped = true;
468 }
469
470 static void cpu_signal(int sig)
471 {
472 if (current_cpu) {
473 cpu_exit(current_cpu);
474 }
475 exit_request = 1;
476 }
477
478 #ifdef CONFIG_LINUX
479 static void sigbus_reraise(void)
480 {
481 sigset_t set;
482 struct sigaction action;
483
484 memset(&action, 0, sizeof(action));
485 action.sa_handler = SIG_DFL;
486 if (!sigaction(SIGBUS, &action, NULL)) {
487 raise(SIGBUS);
488 sigemptyset(&set);
489 sigaddset(&set, SIGBUS);
490 sigprocmask(SIG_UNBLOCK, &set, NULL);
491 }
492 perror("Failed to re-raise SIGBUS!\n");
493 abort();
494 }
495
496 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
497 void *ctx)
498 {
499 if (kvm_on_sigbus(siginfo->ssi_code,
500 (void *)(intptr_t)siginfo->ssi_addr)) {
501 sigbus_reraise();
502 }
503 }
504
505 static void qemu_init_sigbus(void)
506 {
507 struct sigaction action;
508
509 memset(&action, 0, sizeof(action));
510 action.sa_flags = SA_SIGINFO;
511 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
512 sigaction(SIGBUS, &action, NULL);
513
514 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
515 }
516
517 static void qemu_kvm_eat_signals(CPUState *cpu)
518 {
519 struct timespec ts = { 0, 0 };
520 siginfo_t siginfo;
521 sigset_t waitset;
522 sigset_t chkset;
523 int r;
524
525 sigemptyset(&waitset);
526 sigaddset(&waitset, SIG_IPI);
527 sigaddset(&waitset, SIGBUS);
528
529 do {
530 r = sigtimedwait(&waitset, &siginfo, &ts);
531 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
532 perror("sigtimedwait");
533 exit(1);
534 }
535
536 switch (r) {
537 case SIGBUS:
538 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
539 sigbus_reraise();
540 }
541 break;
542 default:
543 break;
544 }
545
546 r = sigpending(&chkset);
547 if (r == -1) {
548 perror("sigpending");
549 exit(1);
550 }
551 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
552 }
553
554 #else /* !CONFIG_LINUX */
555
556 static void qemu_init_sigbus(void)
557 {
558 }
559
560 static void qemu_kvm_eat_signals(CPUState *cpu)
561 {
562 }
563 #endif /* !CONFIG_LINUX */
564
565 #ifndef _WIN32
566 static void dummy_signal(int sig)
567 {
568 }
569
570 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
571 {
572 int r;
573 sigset_t set;
574 struct sigaction sigact;
575
576 memset(&sigact, 0, sizeof(sigact));
577 sigact.sa_handler = dummy_signal;
578 sigaction(SIG_IPI, &sigact, NULL);
579
580 pthread_sigmask(SIG_BLOCK, NULL, &set);
581 sigdelset(&set, SIG_IPI);
582 sigdelset(&set, SIGBUS);
583 r = kvm_set_signal_mask(cpu, &set);
584 if (r) {
585 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
586 exit(1);
587 }
588 }
589
590 static void qemu_tcg_init_cpu_signals(void)
591 {
592 sigset_t set;
593 struct sigaction sigact;
594
595 memset(&sigact, 0, sizeof(sigact));
596 sigact.sa_handler = cpu_signal;
597 sigaction(SIG_IPI, &sigact, NULL);
598
599 sigemptyset(&set);
600 sigaddset(&set, SIG_IPI);
601 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
602 }
603
604 #else /* _WIN32 */
605 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
606 {
607 abort();
608 }
609
610 static void qemu_tcg_init_cpu_signals(void)
611 {
612 }
613 #endif /* _WIN32 */
614
615 static QemuMutex qemu_global_mutex;
616 static QemuCond qemu_io_proceeded_cond;
617 static bool iothread_requesting_mutex;
618
619 static QemuThread io_thread;
620
621 static QemuThread *tcg_cpu_thread;
622 static QemuCond *tcg_halt_cond;
623
624 /* cpu creation */
625 static QemuCond qemu_cpu_cond;
626 /* system init */
627 static QemuCond qemu_pause_cond;
628 static QemuCond qemu_work_cond;
629
630 void qemu_init_cpu_loop(void)
631 {
632 qemu_init_sigbus();
633 qemu_cond_init(&qemu_cpu_cond);
634 qemu_cond_init(&qemu_pause_cond);
635 qemu_cond_init(&qemu_work_cond);
636 qemu_cond_init(&qemu_io_proceeded_cond);
637 qemu_mutex_init(&qemu_global_mutex);
638
639 qemu_thread_get_self(&io_thread);
640 }
641
642 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
643 {
644 struct qemu_work_item wi;
645
646 if (qemu_cpu_is_self(cpu)) {
647 func(data);
648 return;
649 }
650
651 wi.func = func;
652 wi.data = data;
653 if (cpu->queued_work_first == NULL) {
654 cpu->queued_work_first = &wi;
655 } else {
656 cpu->queued_work_last->next = &wi;
657 }
658 cpu->queued_work_last = &wi;
659 wi.next = NULL;
660 wi.done = false;
661
662 qemu_cpu_kick(cpu);
663 while (!wi.done) {
664 CPUState *self_cpu = current_cpu;
665
666 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
667 current_cpu = self_cpu;
668 }
669 }
670
671 static void flush_queued_work(CPUState *cpu)
672 {
673 struct qemu_work_item *wi;
674
675 if (cpu->queued_work_first == NULL) {
676 return;
677 }
678
679 while ((wi = cpu->queued_work_first)) {
680 cpu->queued_work_first = wi->next;
681 wi->func(wi->data);
682 wi->done = true;
683 }
684 cpu->queued_work_last = NULL;
685 qemu_cond_broadcast(&qemu_work_cond);
686 }
687
688 static void qemu_wait_io_event_common(CPUState *cpu)
689 {
690 if (cpu->stop) {
691 cpu->stop = false;
692 cpu->stopped = true;
693 qemu_cond_signal(&qemu_pause_cond);
694 }
695 flush_queued_work(cpu);
696 cpu->thread_kicked = false;
697 }
698
699 static void qemu_tcg_wait_io_event(void)
700 {
701 CPUArchState *env;
702
703 while (all_cpu_threads_idle()) {
704 /* Start accounting real time to the virtual clock if the CPUs
705 are idle. */
706 qemu_clock_warp(vm_clock);
707 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
708 }
709
710 while (iothread_requesting_mutex) {
711 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
712 }
713
714 for (env = first_cpu; env != NULL; env = env->next_cpu) {
715 qemu_wait_io_event_common(ENV_GET_CPU(env));
716 }
717 }
718
719 static void qemu_kvm_wait_io_event(CPUState *cpu)
720 {
721 while (cpu_thread_is_idle(cpu)) {
722 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
723 }
724
725 qemu_kvm_eat_signals(cpu);
726 qemu_wait_io_event_common(cpu);
727 }
728
729 static void *qemu_kvm_cpu_thread_fn(void *arg)
730 {
731 CPUState *cpu = arg;
732 int r;
733
734 qemu_mutex_lock(&qemu_global_mutex);
735 qemu_thread_get_self(cpu->thread);
736 cpu->thread_id = qemu_get_thread_id();
737 current_cpu = cpu;
738
739 r = kvm_init_vcpu(cpu);
740 if (r < 0) {
741 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
742 exit(1);
743 }
744
745 qemu_kvm_init_cpu_signals(cpu);
746
747 /* signal CPU creation */
748 cpu->created = true;
749 qemu_cond_signal(&qemu_cpu_cond);
750
751 while (1) {
752 if (cpu_can_run(cpu)) {
753 r = kvm_cpu_exec(cpu);
754 if (r == EXCP_DEBUG) {
755 cpu_handle_guest_debug(cpu);
756 }
757 }
758 qemu_kvm_wait_io_event(cpu);
759 }
760
761 return NULL;
762 }
763
764 static void *qemu_dummy_cpu_thread_fn(void *arg)
765 {
766 #ifdef _WIN32
767 fprintf(stderr, "qtest is not supported under Windows\n");
768 exit(1);
769 #else
770 CPUState *cpu = arg;
771 sigset_t waitset;
772 int r;
773
774 qemu_mutex_lock_iothread();
775 qemu_thread_get_self(cpu->thread);
776 cpu->thread_id = qemu_get_thread_id();
777
778 sigemptyset(&waitset);
779 sigaddset(&waitset, SIG_IPI);
780
781 /* signal CPU creation */
782 cpu->created = true;
783 qemu_cond_signal(&qemu_cpu_cond);
784
785 current_cpu = cpu;
786 while (1) {
787 current_cpu = NULL;
788 qemu_mutex_unlock_iothread();
789 do {
790 int sig;
791 r = sigwait(&waitset, &sig);
792 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
793 if (r == -1) {
794 perror("sigwait");
795 exit(1);
796 }
797 qemu_mutex_lock_iothread();
798 current_cpu = cpu;
799 qemu_wait_io_event_common(cpu);
800 }
801
802 return NULL;
803 #endif
804 }
805
806 static void tcg_exec_all(void);
807
808 static void tcg_signal_cpu_creation(CPUState *cpu, void *data)
809 {
810 cpu->thread_id = qemu_get_thread_id();
811 cpu->created = true;
812 }
813
814 static void *qemu_tcg_cpu_thread_fn(void *arg)
815 {
816 CPUState *cpu = arg;
817 CPUArchState *env;
818
819 qemu_tcg_init_cpu_signals();
820 qemu_thread_get_self(cpu->thread);
821
822 qemu_mutex_lock(&qemu_global_mutex);
823 qemu_for_each_cpu(tcg_signal_cpu_creation, NULL);
824 qemu_cond_signal(&qemu_cpu_cond);
825
826 /* wait for initial kick-off after machine start */
827 while (ENV_GET_CPU(first_cpu)->stopped) {
828 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
829
830 /* process any pending work */
831 for (env = first_cpu; env != NULL; env = env->next_cpu) {
832 qemu_wait_io_event_common(ENV_GET_CPU(env));
833 }
834 }
835
836 while (1) {
837 tcg_exec_all();
838 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
839 qemu_notify_event();
840 }
841 qemu_tcg_wait_io_event();
842 }
843
844 return NULL;
845 }
846
847 static void qemu_cpu_kick_thread(CPUState *cpu)
848 {
849 #ifndef _WIN32
850 int err;
851
852 err = pthread_kill(cpu->thread->thread, SIG_IPI);
853 if (err) {
854 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
855 exit(1);
856 }
857 #else /* _WIN32 */
858 if (!qemu_cpu_is_self(cpu)) {
859 CONTEXT tcgContext;
860
861 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
862 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
863 GetLastError());
864 exit(1);
865 }
866
867 /* On multi-core systems, we are not sure that the thread is actually
868 * suspended until we can get the context.
869 */
870 tcgContext.ContextFlags = CONTEXT_CONTROL;
871 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
872 continue;
873 }
874
875 cpu_signal(0);
876
877 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
878 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
879 GetLastError());
880 exit(1);
881 }
882 }
883 #endif
884 }
885
886 void qemu_cpu_kick(CPUState *cpu)
887 {
888 qemu_cond_broadcast(cpu->halt_cond);
889 if (!tcg_enabled() && !cpu->thread_kicked) {
890 qemu_cpu_kick_thread(cpu);
891 cpu->thread_kicked = true;
892 }
893 }
894
895 void qemu_cpu_kick_self(void)
896 {
897 #ifndef _WIN32
898 assert(current_cpu);
899
900 if (!current_cpu->thread_kicked) {
901 qemu_cpu_kick_thread(current_cpu);
902 current_cpu->thread_kicked = true;
903 }
904 #else
905 abort();
906 #endif
907 }
908
909 bool qemu_cpu_is_self(CPUState *cpu)
910 {
911 return qemu_thread_is_self(cpu->thread);
912 }
913
914 static bool qemu_in_vcpu_thread(void)
915 {
916 return current_cpu && qemu_cpu_is_self(current_cpu);
917 }
918
919 void qemu_mutex_lock_iothread(void)
920 {
921 if (!tcg_enabled()) {
922 qemu_mutex_lock(&qemu_global_mutex);
923 } else {
924 iothread_requesting_mutex = true;
925 if (qemu_mutex_trylock(&qemu_global_mutex)) {
926 qemu_cpu_kick_thread(ENV_GET_CPU(first_cpu));
927 qemu_mutex_lock(&qemu_global_mutex);
928 }
929 iothread_requesting_mutex = false;
930 qemu_cond_broadcast(&qemu_io_proceeded_cond);
931 }
932 }
933
934 void qemu_mutex_unlock_iothread(void)
935 {
936 qemu_mutex_unlock(&qemu_global_mutex);
937 }
938
939 static int all_vcpus_paused(void)
940 {
941 CPUArchState *penv = first_cpu;
942
943 while (penv) {
944 CPUState *pcpu = ENV_GET_CPU(penv);
945 if (!pcpu->stopped) {
946 return 0;
947 }
948 penv = penv->next_cpu;
949 }
950
951 return 1;
952 }
953
954 void pause_all_vcpus(void)
955 {
956 CPUArchState *penv = first_cpu;
957
958 qemu_clock_enable(vm_clock, false);
959 while (penv) {
960 CPUState *pcpu = ENV_GET_CPU(penv);
961 pcpu->stop = true;
962 qemu_cpu_kick(pcpu);
963 penv = penv->next_cpu;
964 }
965
966 if (qemu_in_vcpu_thread()) {
967 cpu_stop_current();
968 if (!kvm_enabled()) {
969 penv = first_cpu;
970 while (penv) {
971 CPUState *pcpu = ENV_GET_CPU(penv);
972 pcpu->stop = false;
973 pcpu->stopped = true;
974 penv = penv->next_cpu;
975 }
976 return;
977 }
978 }
979
980 while (!all_vcpus_paused()) {
981 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
982 penv = first_cpu;
983 while (penv) {
984 qemu_cpu_kick(ENV_GET_CPU(penv));
985 penv = penv->next_cpu;
986 }
987 }
988 }
989
990 void cpu_resume(CPUState *cpu)
991 {
992 cpu->stop = false;
993 cpu->stopped = false;
994 qemu_cpu_kick(cpu);
995 }
996
997 void resume_all_vcpus(void)
998 {
999 CPUArchState *penv = first_cpu;
1000
1001 qemu_clock_enable(vm_clock, true);
1002 while (penv) {
1003 CPUState *pcpu = ENV_GET_CPU(penv);
1004 cpu_resume(pcpu);
1005 penv = penv->next_cpu;
1006 }
1007 }
1008
1009 static void qemu_tcg_init_vcpu(CPUState *cpu)
1010 {
1011 /* share a single thread for all cpus with TCG */
1012 if (!tcg_cpu_thread) {
1013 cpu->thread = g_malloc0(sizeof(QemuThread));
1014 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1015 qemu_cond_init(cpu->halt_cond);
1016 tcg_halt_cond = cpu->halt_cond;
1017 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1018 QEMU_THREAD_JOINABLE);
1019 #ifdef _WIN32
1020 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1021 #endif
1022 while (!cpu->created) {
1023 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1024 }
1025 tcg_cpu_thread = cpu->thread;
1026 } else {
1027 cpu->thread = tcg_cpu_thread;
1028 cpu->halt_cond = tcg_halt_cond;
1029 }
1030 }
1031
1032 static void qemu_kvm_start_vcpu(CPUState *cpu)
1033 {
1034 cpu->thread = g_malloc0(sizeof(QemuThread));
1035 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1036 qemu_cond_init(cpu->halt_cond);
1037 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1038 QEMU_THREAD_JOINABLE);
1039 while (!cpu->created) {
1040 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1041 }
1042 }
1043
1044 static void qemu_dummy_start_vcpu(CPUState *cpu)
1045 {
1046 cpu->thread = g_malloc0(sizeof(QemuThread));
1047 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1048 qemu_cond_init(cpu->halt_cond);
1049 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1050 QEMU_THREAD_JOINABLE);
1051 while (!cpu->created) {
1052 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1053 }
1054 }
1055
1056 void qemu_init_vcpu(CPUState *cpu)
1057 {
1058 cpu->nr_cores = smp_cores;
1059 cpu->nr_threads = smp_threads;
1060 cpu->stopped = true;
1061 if (kvm_enabled()) {
1062 qemu_kvm_start_vcpu(cpu);
1063 } else if (tcg_enabled()) {
1064 qemu_tcg_init_vcpu(cpu);
1065 } else {
1066 qemu_dummy_start_vcpu(cpu);
1067 }
1068 }
1069
1070 void cpu_stop_current(void)
1071 {
1072 if (current_cpu) {
1073 current_cpu->stop = false;
1074 current_cpu->stopped = true;
1075 cpu_exit(current_cpu);
1076 qemu_cond_signal(&qemu_pause_cond);
1077 }
1078 }
1079
1080 void vm_stop(RunState state)
1081 {
1082 if (qemu_in_vcpu_thread()) {
1083 qemu_system_vmstop_request(state);
1084 /*
1085 * FIXME: should not return to device code in case
1086 * vm_stop() has been requested.
1087 */
1088 cpu_stop_current();
1089 return;
1090 }
1091 do_vm_stop(state);
1092 }
1093
1094 /* does a state transition even if the VM is already stopped,
1095 current state is forgotten forever */
1096 void vm_stop_force_state(RunState state)
1097 {
1098 if (runstate_is_running()) {
1099 vm_stop(state);
1100 } else {
1101 runstate_set(state);
1102 }
1103 }
1104
1105 static int tcg_cpu_exec(CPUArchState *env)
1106 {
1107 int ret;
1108 #ifdef CONFIG_PROFILER
1109 int64_t ti;
1110 #endif
1111
1112 #ifdef CONFIG_PROFILER
1113 ti = profile_getclock();
1114 #endif
1115 if (use_icount) {
1116 int64_t count;
1117 int decr;
1118 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1119 env->icount_decr.u16.low = 0;
1120 env->icount_extra = 0;
1121 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
1122 qemu_icount += count;
1123 decr = (count > 0xffff) ? 0xffff : count;
1124 count -= decr;
1125 env->icount_decr.u16.low = decr;
1126 env->icount_extra = count;
1127 }
1128 ret = cpu_exec(env);
1129 #ifdef CONFIG_PROFILER
1130 qemu_time += profile_getclock() - ti;
1131 #endif
1132 if (use_icount) {
1133 /* Fold pending instructions back into the
1134 instruction counter, and clear the interrupt flag. */
1135 qemu_icount -= (env->icount_decr.u16.low
1136 + env->icount_extra);
1137 env->icount_decr.u32 = 0;
1138 env->icount_extra = 0;
1139 }
1140 return ret;
1141 }
1142
1143 static void tcg_exec_all(void)
1144 {
1145 int r;
1146
1147 /* Account partial waits to the vm_clock. */
1148 qemu_clock_warp(vm_clock);
1149
1150 if (next_cpu == NULL) {
1151 next_cpu = first_cpu;
1152 }
1153 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1154 CPUArchState *env = next_cpu;
1155 CPUState *cpu = ENV_GET_CPU(env);
1156
1157 qemu_clock_enable(vm_clock,
1158 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1159
1160 if (cpu_can_run(cpu)) {
1161 r = tcg_cpu_exec(env);
1162 if (r == EXCP_DEBUG) {
1163 cpu_handle_guest_debug(cpu);
1164 break;
1165 }
1166 } else if (cpu->stop || cpu->stopped) {
1167 break;
1168 }
1169 }
1170 exit_request = 0;
1171 }
1172
1173 void set_numa_modes(void)
1174 {
1175 CPUArchState *env;
1176 CPUState *cpu;
1177 int i;
1178
1179 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1180 cpu = ENV_GET_CPU(env);
1181 for (i = 0; i < nb_numa_nodes; i++) {
1182 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1183 cpu->numa_node = i;
1184 }
1185 }
1186 }
1187 }
1188
1189 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1190 {
1191 /* XXX: implement xxx_cpu_list for targets that still miss it */
1192 #if defined(cpu_list)
1193 cpu_list(f, cpu_fprintf);
1194 #endif
1195 }
1196
1197 CpuInfoList *qmp_query_cpus(Error **errp)
1198 {
1199 CpuInfoList *head = NULL, *cur_item = NULL;
1200 CPUArchState *env;
1201
1202 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1203 CPUState *cpu = ENV_GET_CPU(env);
1204 CpuInfoList *info;
1205
1206 cpu_synchronize_state(cpu);
1207
1208 info = g_malloc0(sizeof(*info));
1209 info->value = g_malloc0(sizeof(*info->value));
1210 info->value->CPU = cpu->cpu_index;
1211 info->value->current = (env == first_cpu);
1212 info->value->halted = cpu->halted;
1213 info->value->thread_id = cpu->thread_id;
1214 #if defined(TARGET_I386)
1215 info->value->has_pc = true;
1216 info->value->pc = env->eip + env->segs[R_CS].base;
1217 #elif defined(TARGET_PPC)
1218 info->value->has_nip = true;
1219 info->value->nip = env->nip;
1220 #elif defined(TARGET_SPARC)
1221 info->value->has_pc = true;
1222 info->value->pc = env->pc;
1223 info->value->has_npc = true;
1224 info->value->npc = env->npc;
1225 #elif defined(TARGET_MIPS)
1226 info->value->has_PC = true;
1227 info->value->PC = env->active_tc.PC;
1228 #endif
1229
1230 /* XXX: waiting for the qapi to support GSList */
1231 if (!cur_item) {
1232 head = cur_item = info;
1233 } else {
1234 cur_item->next = info;
1235 cur_item = info;
1236 }
1237 }
1238
1239 return head;
1240 }
1241
1242 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1243 bool has_cpu, int64_t cpu_index, Error **errp)
1244 {
1245 FILE *f;
1246 uint32_t l;
1247 CPUArchState *env;
1248 CPUState *cpu;
1249 uint8_t buf[1024];
1250
1251 if (!has_cpu) {
1252 cpu_index = 0;
1253 }
1254
1255 cpu = qemu_get_cpu(cpu_index);
1256 if (cpu == NULL) {
1257 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1258 "a CPU number");
1259 return;
1260 }
1261 env = cpu->env_ptr;
1262
1263 f = fopen(filename, "wb");
1264 if (!f) {
1265 error_setg_file_open(errp, errno, filename);
1266 return;
1267 }
1268
1269 while (size != 0) {
1270 l = sizeof(buf);
1271 if (l > size)
1272 l = size;
1273 cpu_memory_rw_debug(env, addr, buf, l, 0);
1274 if (fwrite(buf, 1, l, f) != l) {
1275 error_set(errp, QERR_IO_ERROR);
1276 goto exit;
1277 }
1278 addr += l;
1279 size -= l;
1280 }
1281
1282 exit:
1283 fclose(f);
1284 }
1285
1286 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1287 Error **errp)
1288 {
1289 FILE *f;
1290 uint32_t l;
1291 uint8_t buf[1024];
1292
1293 f = fopen(filename, "wb");
1294 if (!f) {
1295 error_setg_file_open(errp, errno, filename);
1296 return;
1297 }
1298
1299 while (size != 0) {
1300 l = sizeof(buf);
1301 if (l > size)
1302 l = size;
1303 cpu_physical_memory_rw(addr, buf, l, 0);
1304 if (fwrite(buf, 1, l, f) != l) {
1305 error_set(errp, QERR_IO_ERROR);
1306 goto exit;
1307 }
1308 addr += l;
1309 size -= l;
1310 }
1311
1312 exit:
1313 fclose(f);
1314 }
1315
1316 void qmp_inject_nmi(Error **errp)
1317 {
1318 #if defined(TARGET_I386)
1319 CPUArchState *env;
1320
1321 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1322 if (!env->apic_state) {
1323 cpu_interrupt(CPU(x86_env_get_cpu(env)), CPU_INTERRUPT_NMI);
1324 } else {
1325 apic_deliver_nmi(env->apic_state);
1326 }
1327 }
1328 #else
1329 error_set(errp, QERR_UNSUPPORTED);
1330 #endif
1331 }