]> git.proxmox.com Git - mirror_qemu.git/blame - cpus.c
qtest: IRQ interception infrastructure
[mirror_qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
26#include "config-host.h"
27
28#include "monitor.h"
29#include "sysemu.h"
30#include "gdbstub.h"
31#include "dma.h"
32#include "kvm.h"
de0b36b6 33#include "qmp-commands.h"
296af7c9 34
96284e89 35#include "qemu-thread.h"
296af7c9 36#include "cpus.h"
44a9b356 37#include "main-loop.h"
0ff0fc19
JK
38
39#ifndef _WIN32
a8486bc9 40#include "compatfd.h"
0ff0fc19 41#endif
296af7c9 42
6d9cb73c
JK
43#ifdef CONFIG_LINUX
44
45#include <sys/prctl.h>
46
c0532a76
MT
47#ifndef PR_MCE_KILL
48#define PR_MCE_KILL 33
49#endif
50
6d9cb73c
JK
51#ifndef PR_MCE_KILL_SET
52#define PR_MCE_KILL_SET 1
53#endif
54
55#ifndef PR_MCE_KILL_EARLY
56#define PR_MCE_KILL_EARLY 1
57#endif
58
59#endif /* CONFIG_LINUX */
60
9349b4f9 61static CPUArchState *next_cpu;
296af7c9 62
946fb27c
PB
63/***********************************************************/
64/* guest cycle counter */
65
66/* Conversion factor from emulated instructions to virtual clock ticks. */
67static int icount_time_shift;
68/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
69#define MAX_ICOUNT_SHIFT 10
70/* Compensate for varying guest execution speed. */
71static int64_t qemu_icount_bias;
72static QEMUTimer *icount_rt_timer;
73static QEMUTimer *icount_vm_timer;
74static QEMUTimer *icount_warp_timer;
75static int64_t vm_clock_warp_start;
76static int64_t qemu_icount;
77
78typedef struct TimersState {
79 int64_t cpu_ticks_prev;
80 int64_t cpu_ticks_offset;
81 int64_t cpu_clock_offset;
82 int32_t cpu_ticks_enabled;
83 int64_t dummy;
84} TimersState;
85
86TimersState timers_state;
87
88/* Return the virtual CPU time, based on the instruction counter. */
89int64_t cpu_get_icount(void)
90{
91 int64_t icount;
9349b4f9 92 CPUArchState *env = cpu_single_env;
946fb27c
PB
93
94 icount = qemu_icount;
95 if (env) {
96 if (!can_do_io(env)) {
97 fprintf(stderr, "Bad clock read\n");
98 }
99 icount -= (env->icount_decr.u16.low + env->icount_extra);
100 }
101 return qemu_icount_bias + (icount << icount_time_shift);
102}
103
104/* return the host CPU cycle counter and handle stop/restart */
105int64_t cpu_get_ticks(void)
106{
107 if (use_icount) {
108 return cpu_get_icount();
109 }
110 if (!timers_state.cpu_ticks_enabled) {
111 return timers_state.cpu_ticks_offset;
112 } else {
113 int64_t ticks;
114 ticks = cpu_get_real_ticks();
115 if (timers_state.cpu_ticks_prev > ticks) {
116 /* Note: non increasing ticks may happen if the host uses
117 software suspend */
118 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
119 }
120 timers_state.cpu_ticks_prev = ticks;
121 return ticks + timers_state.cpu_ticks_offset;
122 }
123}
124
125/* return the host CPU monotonic timer and handle stop/restart */
126int64_t cpu_get_clock(void)
127{
128 int64_t ti;
129 if (!timers_state.cpu_ticks_enabled) {
130 return timers_state.cpu_clock_offset;
131 } else {
132 ti = get_clock();
133 return ti + timers_state.cpu_clock_offset;
134 }
135}
136
137/* enable cpu_get_ticks() */
138void cpu_enable_ticks(void)
139{
140 if (!timers_state.cpu_ticks_enabled) {
141 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
142 timers_state.cpu_clock_offset -= get_clock();
143 timers_state.cpu_ticks_enabled = 1;
144 }
145}
146
147/* disable cpu_get_ticks() : the clock is stopped. You must not call
148 cpu_get_ticks() after that. */
149void cpu_disable_ticks(void)
150{
151 if (timers_state.cpu_ticks_enabled) {
152 timers_state.cpu_ticks_offset = cpu_get_ticks();
153 timers_state.cpu_clock_offset = cpu_get_clock();
154 timers_state.cpu_ticks_enabled = 0;
155 }
156}
157
158/* Correlation between real and virtual time is always going to be
159 fairly approximate, so ignore small variation.
160 When the guest is idle real and virtual time will be aligned in
161 the IO wait loop. */
162#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
163
164static void icount_adjust(void)
165{
166 int64_t cur_time;
167 int64_t cur_icount;
168 int64_t delta;
169 static int64_t last_delta;
170 /* If the VM is not running, then do nothing. */
171 if (!runstate_is_running()) {
172 return;
173 }
174 cur_time = cpu_get_clock();
175 cur_icount = qemu_get_clock_ns(vm_clock);
176 delta = cur_icount - cur_time;
177 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
178 if (delta > 0
179 && last_delta + ICOUNT_WOBBLE < delta * 2
180 && icount_time_shift > 0) {
181 /* The guest is getting too far ahead. Slow time down. */
182 icount_time_shift--;
183 }
184 if (delta < 0
185 && last_delta - ICOUNT_WOBBLE > delta * 2
186 && icount_time_shift < MAX_ICOUNT_SHIFT) {
187 /* The guest is getting too far behind. Speed time up. */
188 icount_time_shift++;
189 }
190 last_delta = delta;
191 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
192}
193
194static void icount_adjust_rt(void *opaque)
195{
196 qemu_mod_timer(icount_rt_timer,
197 qemu_get_clock_ms(rt_clock) + 1000);
198 icount_adjust();
199}
200
201static void icount_adjust_vm(void *opaque)
202{
203 qemu_mod_timer(icount_vm_timer,
204 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
205 icount_adjust();
206}
207
208static int64_t qemu_icount_round(int64_t count)
209{
210 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
211}
212
213static void icount_warp_rt(void *opaque)
214{
215 if (vm_clock_warp_start == -1) {
216 return;
217 }
218
219 if (runstate_is_running()) {
220 int64_t clock = qemu_get_clock_ns(rt_clock);
221 int64_t warp_delta = clock - vm_clock_warp_start;
222 if (use_icount == 1) {
223 qemu_icount_bias += warp_delta;
224 } else {
225 /*
226 * In adaptive mode, do not let the vm_clock run too
227 * far ahead of real time.
228 */
229 int64_t cur_time = cpu_get_clock();
230 int64_t cur_icount = qemu_get_clock_ns(vm_clock);
231 int64_t delta = cur_time - cur_icount;
232 qemu_icount_bias += MIN(warp_delta, delta);
233 }
234 if (qemu_clock_expired(vm_clock)) {
235 qemu_notify_event();
236 }
237 }
238 vm_clock_warp_start = -1;
239}
240
241void qemu_clock_warp(QEMUClock *clock)
242{
243 int64_t deadline;
244
245 /*
246 * There are too many global variables to make the "warp" behavior
247 * applicable to other clocks. But a clock argument removes the
248 * need for if statements all over the place.
249 */
250 if (clock != vm_clock || !use_icount) {
251 return;
252 }
253
254 /*
255 * If the CPUs have been sleeping, advance the vm_clock timer now. This
256 * ensures that the deadline for the timer is computed correctly below.
257 * This also makes sure that the insn counter is synchronized before the
258 * CPU starts running, in case the CPU is woken by an event other than
259 * the earliest vm_clock timer.
260 */
261 icount_warp_rt(NULL);
262 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(vm_clock)) {
263 qemu_del_timer(icount_warp_timer);
264 return;
265 }
266
267 vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
268 deadline = qemu_clock_deadline(vm_clock);
269 if (deadline > 0) {
270 /*
271 * Ensure the vm_clock proceeds even when the virtual CPU goes to
272 * sleep. Otherwise, the CPU might be waiting for a future timer
273 * interrupt to wake it up, but the interrupt never comes because
274 * the vCPU isn't running any insns and thus doesn't advance the
275 * vm_clock.
276 *
277 * An extreme solution for this problem would be to never let VCPUs
278 * sleep in icount mode if there is a pending vm_clock timer; rather
279 * time could just advance to the next vm_clock event. Instead, we
280 * do stop VCPUs and only advance vm_clock after some "real" time,
281 * (related to the time left until the next event) has passed. This
282 * rt_clock timer will do this. This avoids that the warps are too
283 * visible externally---for example, you will not be sending network
07f35073 284 * packets continuously instead of every 100ms.
946fb27c
PB
285 */
286 qemu_mod_timer(icount_warp_timer, vm_clock_warp_start + deadline);
287 } else {
288 qemu_notify_event();
289 }
290}
291
292static const VMStateDescription vmstate_timers = {
293 .name = "timer",
294 .version_id = 2,
295 .minimum_version_id = 1,
296 .minimum_version_id_old = 1,
297 .fields = (VMStateField[]) {
298 VMSTATE_INT64(cpu_ticks_offset, TimersState),
299 VMSTATE_INT64(dummy, TimersState),
300 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
301 VMSTATE_END_OF_LIST()
302 }
303};
304
305void configure_icount(const char *option)
306{
307 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
308 if (!option) {
309 return;
310 }
311
312 icount_warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
313 if (strcmp(option, "auto") != 0) {
314 icount_time_shift = strtol(option, NULL, 0);
315 use_icount = 1;
316 return;
317 }
318
319 use_icount = 2;
320
321 /* 125MIPS seems a reasonable initial guess at the guest speed.
322 It will be corrected fairly quickly anyway. */
323 icount_time_shift = 3;
324
325 /* Have both realtime and virtual time triggers for speed adjustment.
326 The realtime trigger catches emulated time passing too slowly,
327 the virtual time trigger catches emulated time passing too fast.
328 Realtime triggers occur even when idle, so use them less frequently
329 than VM triggers. */
330 icount_rt_timer = qemu_new_timer_ms(rt_clock, icount_adjust_rt, NULL);
331 qemu_mod_timer(icount_rt_timer,
332 qemu_get_clock_ms(rt_clock) + 1000);
333 icount_vm_timer = qemu_new_timer_ns(vm_clock, icount_adjust_vm, NULL);
334 qemu_mod_timer(icount_vm_timer,
335 qemu_get_clock_ns(vm_clock) + get_ticks_per_sec() / 10);
336}
337
296af7c9
BS
338/***********************************************************/
339void hw_error(const char *fmt, ...)
340{
341 va_list ap;
9349b4f9 342 CPUArchState *env;
296af7c9
BS
343
344 va_start(ap, fmt);
345 fprintf(stderr, "qemu: hardware error: ");
346 vfprintf(stderr, fmt, ap);
347 fprintf(stderr, "\n");
348 for(env = first_cpu; env != NULL; env = env->next_cpu) {
349 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
350#ifdef TARGET_I386
351 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
352#else
353 cpu_dump_state(env, stderr, fprintf, 0);
354#endif
355 }
356 va_end(ap);
357 abort();
358}
359
360void cpu_synchronize_all_states(void)
361{
9349b4f9 362 CPUArchState *cpu;
296af7c9
BS
363
364 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
365 cpu_synchronize_state(cpu);
366 }
367}
368
369void cpu_synchronize_all_post_reset(void)
370{
9349b4f9 371 CPUArchState *cpu;
296af7c9
BS
372
373 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
374 cpu_synchronize_post_reset(cpu);
375 }
376}
377
378void cpu_synchronize_all_post_init(void)
379{
9349b4f9 380 CPUArchState *cpu;
296af7c9
BS
381
382 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
383 cpu_synchronize_post_init(cpu);
384 }
385}
386
9349b4f9 387int cpu_is_stopped(CPUArchState *env)
3ae9501c 388{
1354869c 389 return !runstate_is_running() || env->stopped;
3ae9501c
MT
390}
391
1dfb4dd9 392static void do_vm_stop(RunState state)
296af7c9 393{
1354869c 394 if (runstate_is_running()) {
296af7c9 395 cpu_disable_ticks();
296af7c9 396 pause_all_vcpus();
f5bbfba1 397 runstate_set(state);
1dfb4dd9 398 vm_state_notify(0, state);
922453bc 399 bdrv_drain_all();
55df6f33 400 bdrv_flush_all();
296af7c9
BS
401 monitor_protocol_event(QEVENT_STOP, NULL);
402 }
403}
404
9349b4f9 405static int cpu_can_run(CPUArchState *env)
296af7c9 406{
0ab07c62 407 if (env->stop) {
296af7c9 408 return 0;
0ab07c62 409 }
1354869c 410 if (env->stopped || !runstate_is_running()) {
296af7c9 411 return 0;
0ab07c62 412 }
296af7c9
BS
413 return 1;
414}
415
9349b4f9 416static bool cpu_thread_is_idle(CPUArchState *env)
296af7c9 417{
16400322
JK
418 if (env->stop || env->queued_work_first) {
419 return false;
420 }
1354869c 421 if (env->stopped || !runstate_is_running()) {
16400322
JK
422 return true;
423 }
f2c1cc81
JK
424 if (!env->halted || qemu_cpu_has_work(env) ||
425 (kvm_enabled() && kvm_irqchip_in_kernel())) {
16400322
JK
426 return false;
427 }
428 return true;
296af7c9
BS
429}
430
ab33fcda 431bool all_cpu_threads_idle(void)
296af7c9 432{
9349b4f9 433 CPUArchState *env;
296af7c9 434
16400322
JK
435 for (env = first_cpu; env != NULL; env = env->next_cpu) {
436 if (!cpu_thread_is_idle(env)) {
437 return false;
438 }
439 }
440 return true;
296af7c9
BS
441}
442
9349b4f9 443static void cpu_handle_guest_debug(CPUArchState *env)
83f338f7 444{
3c638d06 445 gdb_set_stop_cpu(env);
8cf71710 446 qemu_system_debug_request();
83f338f7 447 env->stopped = 1;
3c638d06
JK
448}
449
714bd040
PB
450static void cpu_signal(int sig)
451{
452 if (cpu_single_env) {
453 cpu_exit(cpu_single_env);
454 }
455 exit_request = 1;
456}
714bd040 457
6d9cb73c
JK
458#ifdef CONFIG_LINUX
459static void sigbus_reraise(void)
460{
461 sigset_t set;
462 struct sigaction action;
463
464 memset(&action, 0, sizeof(action));
465 action.sa_handler = SIG_DFL;
466 if (!sigaction(SIGBUS, &action, NULL)) {
467 raise(SIGBUS);
468 sigemptyset(&set);
469 sigaddset(&set, SIGBUS);
470 sigprocmask(SIG_UNBLOCK, &set, NULL);
471 }
472 perror("Failed to re-raise SIGBUS!\n");
473 abort();
474}
475
476static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
477 void *ctx)
478{
479 if (kvm_on_sigbus(siginfo->ssi_code,
480 (void *)(intptr_t)siginfo->ssi_addr)) {
481 sigbus_reraise();
482 }
483}
484
485static void qemu_init_sigbus(void)
486{
487 struct sigaction action;
488
489 memset(&action, 0, sizeof(action));
490 action.sa_flags = SA_SIGINFO;
491 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
492 sigaction(SIGBUS, &action, NULL);
493
494 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
495}
496
9349b4f9 497static void qemu_kvm_eat_signals(CPUArchState *env)
1ab3c6c0
JK
498{
499 struct timespec ts = { 0, 0 };
500 siginfo_t siginfo;
501 sigset_t waitset;
502 sigset_t chkset;
503 int r;
504
505 sigemptyset(&waitset);
506 sigaddset(&waitset, SIG_IPI);
507 sigaddset(&waitset, SIGBUS);
508
509 do {
510 r = sigtimedwait(&waitset, &siginfo, &ts);
511 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
512 perror("sigtimedwait");
513 exit(1);
514 }
515
516 switch (r) {
517 case SIGBUS:
518 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
519 sigbus_reraise();
520 }
521 break;
522 default:
523 break;
524 }
525
526 r = sigpending(&chkset);
527 if (r == -1) {
528 perror("sigpending");
529 exit(1);
530 }
531 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
532}
533
6d9cb73c
JK
534#else /* !CONFIG_LINUX */
535
536static void qemu_init_sigbus(void)
537{
538}
1ab3c6c0 539
9349b4f9 540static void qemu_kvm_eat_signals(CPUArchState *env)
1ab3c6c0
JK
541{
542}
6d9cb73c
JK
543#endif /* !CONFIG_LINUX */
544
296af7c9 545#ifndef _WIN32
55f8d6ac
JK
546static void dummy_signal(int sig)
547{
548}
55f8d6ac 549
9349b4f9 550static void qemu_kvm_init_cpu_signals(CPUArchState *env)
714bd040
PB
551{
552 int r;
553 sigset_t set;
554 struct sigaction sigact;
555
556 memset(&sigact, 0, sizeof(sigact));
557 sigact.sa_handler = dummy_signal;
558 sigaction(SIG_IPI, &sigact, NULL);
559
714bd040
PB
560 pthread_sigmask(SIG_BLOCK, NULL, &set);
561 sigdelset(&set, SIG_IPI);
714bd040
PB
562 sigdelset(&set, SIGBUS);
563 r = kvm_set_signal_mask(env, &set);
564 if (r) {
565 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
566 exit(1);
567 }
568}
569
570static void qemu_tcg_init_cpu_signals(void)
571{
714bd040
PB
572 sigset_t set;
573 struct sigaction sigact;
574
575 memset(&sigact, 0, sizeof(sigact));
576 sigact.sa_handler = cpu_signal;
577 sigaction(SIG_IPI, &sigact, NULL);
578
579 sigemptyset(&set);
580 sigaddset(&set, SIG_IPI);
581 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
714bd040
PB
582}
583
55f8d6ac 584#else /* _WIN32 */
9349b4f9 585static void qemu_kvm_init_cpu_signals(CPUArchState *env)
ff48eb5f 586{
714bd040
PB
587 abort();
588}
ff48eb5f 589
714bd040
PB
590static void qemu_tcg_init_cpu_signals(void)
591{
ff48eb5f 592}
714bd040 593#endif /* _WIN32 */
ff48eb5f 594
296af7c9 595QemuMutex qemu_global_mutex;
46daff13
PB
596static QemuCond qemu_io_proceeded_cond;
597static bool iothread_requesting_mutex;
296af7c9
BS
598
599static QemuThread io_thread;
600
601static QemuThread *tcg_cpu_thread;
602static QemuCond *tcg_halt_cond;
603
296af7c9
BS
604/* cpu creation */
605static QemuCond qemu_cpu_cond;
606/* system init */
296af7c9 607static QemuCond qemu_pause_cond;
e82bcec2 608static QemuCond qemu_work_cond;
296af7c9 609
d3b12f5d 610void qemu_init_cpu_loop(void)
296af7c9 611{
6d9cb73c 612 qemu_init_sigbus();
ed94592b 613 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
614 qemu_cond_init(&qemu_pause_cond);
615 qemu_cond_init(&qemu_work_cond);
46daff13 616 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 617 qemu_mutex_init(&qemu_global_mutex);
296af7c9 618
b7680cb6 619 qemu_thread_get_self(&io_thread);
296af7c9
BS
620}
621
9349b4f9 622void run_on_cpu(CPUArchState *env, void (*func)(void *data), void *data)
e82bcec2
MT
623{
624 struct qemu_work_item wi;
625
b7680cb6 626 if (qemu_cpu_is_self(env)) {
e82bcec2
MT
627 func(data);
628 return;
629 }
630
631 wi.func = func;
632 wi.data = data;
0ab07c62 633 if (!env->queued_work_first) {
e82bcec2 634 env->queued_work_first = &wi;
0ab07c62 635 } else {
e82bcec2 636 env->queued_work_last->next = &wi;
0ab07c62 637 }
e82bcec2
MT
638 env->queued_work_last = &wi;
639 wi.next = NULL;
640 wi.done = false;
641
642 qemu_cpu_kick(env);
643 while (!wi.done) {
9349b4f9 644 CPUArchState *self_env = cpu_single_env;
e82bcec2
MT
645
646 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
647 cpu_single_env = self_env;
648 }
649}
650
9349b4f9 651static void flush_queued_work(CPUArchState *env)
e82bcec2
MT
652{
653 struct qemu_work_item *wi;
654
0ab07c62 655 if (!env->queued_work_first) {
e82bcec2 656 return;
0ab07c62 657 }
e82bcec2
MT
658
659 while ((wi = env->queued_work_first)) {
660 env->queued_work_first = wi->next;
661 wi->func(wi->data);
662 wi->done = true;
663 }
664 env->queued_work_last = NULL;
665 qemu_cond_broadcast(&qemu_work_cond);
666}
667
9349b4f9 668static void qemu_wait_io_event_common(CPUArchState *env)
296af7c9
BS
669{
670 if (env->stop) {
671 env->stop = 0;
672 env->stopped = 1;
673 qemu_cond_signal(&qemu_pause_cond);
674 }
e82bcec2 675 flush_queued_work(env);
aa2c364b 676 env->thread_kicked = false;
296af7c9
BS
677}
678
6cabe1f3 679static void qemu_tcg_wait_io_event(void)
296af7c9 680{
9349b4f9 681 CPUArchState *env;
6cabe1f3 682
16400322 683 while (all_cpu_threads_idle()) {
ab33fcda
PB
684 /* Start accounting real time to the virtual clock if the CPUs
685 are idle. */
686 qemu_clock_warp(vm_clock);
9705fbb5 687 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
16400322 688 }
296af7c9 689
46daff13
PB
690 while (iothread_requesting_mutex) {
691 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
692 }
6cabe1f3
JK
693
694 for (env = first_cpu; env != NULL; env = env->next_cpu) {
695 qemu_wait_io_event_common(env);
696 }
296af7c9
BS
697}
698
9349b4f9 699static void qemu_kvm_wait_io_event(CPUArchState *env)
296af7c9 700{
16400322 701 while (cpu_thread_is_idle(env)) {
9705fbb5 702 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
16400322 703 }
296af7c9 704
5db5bdac 705 qemu_kvm_eat_signals(env);
296af7c9
BS
706 qemu_wait_io_event_common(env);
707}
708
7e97cd88 709static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 710{
9349b4f9 711 CPUArchState *env = arg;
84b4915d 712 int r;
296af7c9 713
6164e6d6 714 qemu_mutex_lock(&qemu_global_mutex);
b7680cb6 715 qemu_thread_get_self(env->thread);
dc7a09cf 716 env->thread_id = qemu_get_thread_id();
e479c207 717 cpu_single_env = env;
296af7c9 718
84b4915d
JK
719 r = kvm_init_vcpu(env);
720 if (r < 0) {
721 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
722 exit(1);
723 }
296af7c9 724
55f8d6ac 725 qemu_kvm_init_cpu_signals(env);
296af7c9
BS
726
727 /* signal CPU creation */
296af7c9
BS
728 env->created = 1;
729 qemu_cond_signal(&qemu_cpu_cond);
730
296af7c9 731 while (1) {
0ab07c62 732 if (cpu_can_run(env)) {
6792a57b 733 r = kvm_cpu_exec(env);
83f338f7 734 if (r == EXCP_DEBUG) {
1009d2ed 735 cpu_handle_guest_debug(env);
83f338f7 736 }
0ab07c62 737 }
296af7c9
BS
738 qemu_kvm_wait_io_event(env);
739 }
740
741 return NULL;
742}
743
c7f0f3b1
AL
744static void *qemu_dummy_cpu_thread_fn(void *arg)
745{
746#ifdef _WIN32
747 fprintf(stderr, "qtest is not supported under Windows\n");
748 exit(1);
749#else
750 CPUArchState *env = arg;
751 sigset_t waitset;
752 int r;
753
754 qemu_mutex_lock_iothread();
755 qemu_thread_get_self(env->thread);
756 env->thread_id = qemu_get_thread_id();
757
758 sigemptyset(&waitset);
759 sigaddset(&waitset, SIG_IPI);
760
761 /* signal CPU creation */
762 env->created = 1;
763 qemu_cond_signal(&qemu_cpu_cond);
764
765 cpu_single_env = env;
766 while (1) {
767 cpu_single_env = NULL;
768 qemu_mutex_unlock_iothread();
769 do {
770 int sig;
771 r = sigwait(&waitset, &sig);
772 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
773 if (r == -1) {
774 perror("sigwait");
775 exit(1);
776 }
777 qemu_mutex_lock_iothread();
778 cpu_single_env = env;
779 qemu_wait_io_event_common(env);
780 }
781
782 return NULL;
783#endif
784}
785
bdb7ca67
JK
786static void tcg_exec_all(void);
787
7e97cd88 788static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 789{
9349b4f9 790 CPUArchState *env = arg;
296af7c9 791
55f8d6ac 792 qemu_tcg_init_cpu_signals();
b7680cb6 793 qemu_thread_get_self(env->thread);
296af7c9
BS
794
795 /* signal CPU creation */
796 qemu_mutex_lock(&qemu_global_mutex);
0ab07c62 797 for (env = first_cpu; env != NULL; env = env->next_cpu) {
dc7a09cf 798 env->thread_id = qemu_get_thread_id();
296af7c9 799 env->created = 1;
0ab07c62 800 }
296af7c9
BS
801 qemu_cond_signal(&qemu_cpu_cond);
802
fa7d1867
JK
803 /* wait for initial kick-off after machine start */
804 while (first_cpu->stopped) {
805 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
8e564b4e
JK
806
807 /* process any pending work */
808 for (env = first_cpu; env != NULL; env = env->next_cpu) {
809 qemu_wait_io_event_common(env);
810 }
0ab07c62 811 }
296af7c9
BS
812
813 while (1) {
bdb7ca67 814 tcg_exec_all();
946fb27c 815 if (use_icount && qemu_clock_deadline(vm_clock) <= 0) {
3b2319a3
PB
816 qemu_notify_event();
817 }
6cabe1f3 818 qemu_tcg_wait_io_event();
296af7c9
BS
819 }
820
821 return NULL;
822}
823
9349b4f9 824static void qemu_cpu_kick_thread(CPUArchState *env)
cc015e9a
PB
825{
826#ifndef _WIN32
827 int err;
828
829 err = pthread_kill(env->thread->thread, SIG_IPI);
830 if (err) {
831 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
832 exit(1);
833 }
834#else /* _WIN32 */
835 if (!qemu_cpu_is_self(env)) {
1ecf47bf 836 SuspendThread(env->hThread);
cc015e9a 837 cpu_signal(0);
1ecf47bf 838 ResumeThread(env->hThread);
cc015e9a
PB
839 }
840#endif
841}
842
296af7c9
BS
843void qemu_cpu_kick(void *_env)
844{
9349b4f9 845 CPUArchState *env = _env;
296af7c9 846
296af7c9 847 qemu_cond_broadcast(env->halt_cond);
c7f0f3b1 848 if (!tcg_enabled() && !env->thread_kicked) {
cc015e9a 849 qemu_cpu_kick_thread(env);
aa2c364b
JK
850 env->thread_kicked = true;
851 }
296af7c9
BS
852}
853
46d62fac 854void qemu_cpu_kick_self(void)
296af7c9 855{
b55c22c6 856#ifndef _WIN32
46d62fac 857 assert(cpu_single_env);
296af7c9 858
46d62fac 859 if (!cpu_single_env->thread_kicked) {
cc015e9a 860 qemu_cpu_kick_thread(cpu_single_env);
46d62fac 861 cpu_single_env->thread_kicked = true;
296af7c9 862 }
b55c22c6
PB
863#else
864 abort();
865#endif
296af7c9
BS
866}
867
b7680cb6 868int qemu_cpu_is_self(void *_env)
296af7c9 869{
9349b4f9 870 CPUArchState *env = _env;
a8486bc9 871
b7680cb6 872 return qemu_thread_is_self(env->thread);
296af7c9
BS
873}
874
296af7c9
BS
875void qemu_mutex_lock_iothread(void)
876{
c7f0f3b1 877 if (!tcg_enabled()) {
296af7c9 878 qemu_mutex_lock(&qemu_global_mutex);
1a28cac3 879 } else {
46daff13 880 iothread_requesting_mutex = true;
1a28cac3 881 if (qemu_mutex_trylock(&qemu_global_mutex)) {
cc015e9a 882 qemu_cpu_kick_thread(first_cpu);
1a28cac3
MT
883 qemu_mutex_lock(&qemu_global_mutex);
884 }
46daff13
PB
885 iothread_requesting_mutex = false;
886 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 887 }
296af7c9
BS
888}
889
890void qemu_mutex_unlock_iothread(void)
891{
892 qemu_mutex_unlock(&qemu_global_mutex);
893}
894
895static int all_vcpus_paused(void)
896{
9349b4f9 897 CPUArchState *penv = first_cpu;
296af7c9
BS
898
899 while (penv) {
0ab07c62 900 if (!penv->stopped) {
296af7c9 901 return 0;
0ab07c62 902 }
5207a5e0 903 penv = penv->next_cpu;
296af7c9
BS
904 }
905
906 return 1;
907}
908
909void pause_all_vcpus(void)
910{
9349b4f9 911 CPUArchState *penv = first_cpu;
296af7c9 912
a5c57d64 913 qemu_clock_enable(vm_clock, false);
296af7c9
BS
914 while (penv) {
915 penv->stop = 1;
296af7c9 916 qemu_cpu_kick(penv);
5207a5e0 917 penv = penv->next_cpu;
296af7c9
BS
918 }
919
d798e974
JK
920 if (!qemu_thread_is_self(&io_thread)) {
921 cpu_stop_current();
922 if (!kvm_enabled()) {
923 while (penv) {
924 penv->stop = 0;
925 penv->stopped = 1;
926 penv = penv->next_cpu;
927 }
928 return;
929 }
930 }
931
296af7c9 932 while (!all_vcpus_paused()) {
be7d6c57 933 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
296af7c9
BS
934 penv = first_cpu;
935 while (penv) {
1fbb22e5 936 qemu_cpu_kick(penv);
5207a5e0 937 penv = penv->next_cpu;
296af7c9
BS
938 }
939 }
940}
941
942void resume_all_vcpus(void)
943{
9349b4f9 944 CPUArchState *penv = first_cpu;
296af7c9 945
47113ab6 946 qemu_clock_enable(vm_clock, true);
296af7c9
BS
947 while (penv) {
948 penv->stop = 0;
949 penv->stopped = 0;
296af7c9 950 qemu_cpu_kick(penv);
5207a5e0 951 penv = penv->next_cpu;
296af7c9
BS
952 }
953}
954
7e97cd88 955static void qemu_tcg_init_vcpu(void *_env)
296af7c9 956{
9349b4f9 957 CPUArchState *env = _env;
0ab07c62 958
296af7c9
BS
959 /* share a single thread for all cpus with TCG */
960 if (!tcg_cpu_thread) {
7267c094
AL
961 env->thread = g_malloc0(sizeof(QemuThread));
962 env->halt_cond = g_malloc0(sizeof(QemuCond));
296af7c9 963 qemu_cond_init(env->halt_cond);
fa7d1867 964 tcg_halt_cond = env->halt_cond;
cf218714 965 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env,
1ecf47bf
PB
966 QEMU_THREAD_JOINABLE);
967#ifdef _WIN32
968 env->hThread = qemu_thread_get_handle(env->thread);
969#endif
0ab07c62 970 while (env->created == 0) {
18a85728 971 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 972 }
296af7c9 973 tcg_cpu_thread = env->thread;
296af7c9
BS
974 } else {
975 env->thread = tcg_cpu_thread;
976 env->halt_cond = tcg_halt_cond;
977 }
978}
979
9349b4f9 980static void qemu_kvm_start_vcpu(CPUArchState *env)
296af7c9 981{
7267c094
AL
982 env->thread = g_malloc0(sizeof(QemuThread));
983 env->halt_cond = g_malloc0(sizeof(QemuCond));
296af7c9 984 qemu_cond_init(env->halt_cond);
cf218714 985 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env,
1ecf47bf 986 QEMU_THREAD_JOINABLE);
0ab07c62 987 while (env->created == 0) {
18a85728 988 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 989 }
296af7c9
BS
990}
991
c7f0f3b1
AL
992static void qemu_dummy_start_vcpu(CPUArchState *env)
993{
994 env->thread = g_malloc0(sizeof(QemuThread));
995 env->halt_cond = g_malloc0(sizeof(QemuCond));
996 qemu_cond_init(env->halt_cond);
997 qemu_thread_create(env->thread, qemu_dummy_cpu_thread_fn, env,
998 QEMU_THREAD_JOINABLE);
999 while (env->created == 0) {
1000 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1001 }
1002}
1003
296af7c9
BS
1004void qemu_init_vcpu(void *_env)
1005{
9349b4f9 1006 CPUArchState *env = _env;
296af7c9
BS
1007
1008 env->nr_cores = smp_cores;
1009 env->nr_threads = smp_threads;
fa7d1867 1010 env->stopped = 1;
0ab07c62 1011 if (kvm_enabled()) {
7e97cd88 1012 qemu_kvm_start_vcpu(env);
c7f0f3b1 1013 } else if (tcg_enabled()) {
7e97cd88 1014 qemu_tcg_init_vcpu(env);
c7f0f3b1
AL
1015 } else {
1016 qemu_dummy_start_vcpu(env);
0ab07c62 1017 }
296af7c9
BS
1018}
1019
b4a3d965 1020void cpu_stop_current(void)
296af7c9 1021{
b4a3d965 1022 if (cpu_single_env) {
67bb172f 1023 cpu_single_env->stop = 0;
b4a3d965
JK
1024 cpu_single_env->stopped = 1;
1025 cpu_exit(cpu_single_env);
67bb172f 1026 qemu_cond_signal(&qemu_pause_cond);
b4a3d965 1027 }
296af7c9
BS
1028}
1029
1dfb4dd9 1030void vm_stop(RunState state)
296af7c9 1031{
b7680cb6 1032 if (!qemu_thread_is_self(&io_thread)) {
1dfb4dd9 1033 qemu_system_vmstop_request(state);
296af7c9
BS
1034 /*
1035 * FIXME: should not return to device code in case
1036 * vm_stop() has been requested.
1037 */
b4a3d965 1038 cpu_stop_current();
296af7c9
BS
1039 return;
1040 }
1dfb4dd9 1041 do_vm_stop(state);
296af7c9
BS
1042}
1043
8a9236f1
LC
1044/* does a state transition even if the VM is already stopped,
1045 current state is forgotten forever */
1046void vm_stop_force_state(RunState state)
1047{
1048 if (runstate_is_running()) {
1049 vm_stop(state);
1050 } else {
1051 runstate_set(state);
1052 }
1053}
1054
9349b4f9 1055static int tcg_cpu_exec(CPUArchState *env)
296af7c9
BS
1056{
1057 int ret;
1058#ifdef CONFIG_PROFILER
1059 int64_t ti;
1060#endif
1061
1062#ifdef CONFIG_PROFILER
1063 ti = profile_getclock();
1064#endif
1065 if (use_icount) {
1066 int64_t count;
1067 int decr;
1068 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1069 env->icount_decr.u16.low = 0;
1070 env->icount_extra = 0;
946fb27c 1071 count = qemu_icount_round(qemu_clock_deadline(vm_clock));
296af7c9
BS
1072 qemu_icount += count;
1073 decr = (count > 0xffff) ? 0xffff : count;
1074 count -= decr;
1075 env->icount_decr.u16.low = decr;
1076 env->icount_extra = count;
1077 }
1078 ret = cpu_exec(env);
1079#ifdef CONFIG_PROFILER
1080 qemu_time += profile_getclock() - ti;
1081#endif
1082 if (use_icount) {
1083 /* Fold pending instructions back into the
1084 instruction counter, and clear the interrupt flag. */
1085 qemu_icount -= (env->icount_decr.u16.low
1086 + env->icount_extra);
1087 env->icount_decr.u32 = 0;
1088 env->icount_extra = 0;
1089 }
1090 return ret;
1091}
1092
bdb7ca67 1093static void tcg_exec_all(void)
296af7c9 1094{
9a36085b
JK
1095 int r;
1096
ab33fcda
PB
1097 /* Account partial waits to the vm_clock. */
1098 qemu_clock_warp(vm_clock);
1099
0ab07c62 1100 if (next_cpu == NULL) {
296af7c9 1101 next_cpu = first_cpu;
0ab07c62 1102 }
c629a4bc 1103 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
9349b4f9 1104 CPUArchState *env = next_cpu;
296af7c9
BS
1105
1106 qemu_clock_enable(vm_clock,
345f4426 1107 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1108
3c638d06 1109 if (cpu_can_run(env)) {
bdb7ca67 1110 r = tcg_cpu_exec(env);
9a36085b 1111 if (r == EXCP_DEBUG) {
1009d2ed 1112 cpu_handle_guest_debug(env);
3c638d06
JK
1113 break;
1114 }
df646dfd 1115 } else if (env->stop || env->stopped) {
296af7c9
BS
1116 break;
1117 }
1118 }
c629a4bc 1119 exit_request = 0;
296af7c9
BS
1120}
1121
1122void set_numa_modes(void)
1123{
9349b4f9 1124 CPUArchState *env;
296af7c9
BS
1125 int i;
1126
1127 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1128 for (i = 0; i < nb_numa_nodes; i++) {
1129 if (node_cpumask[i] & (1 << env->cpu_index)) {
1130 env->numa_node = i;
1131 }
1132 }
1133 }
1134}
1135
1136void set_cpu_log(const char *optarg)
1137{
1138 int mask;
1139 const CPULogItem *item;
1140
1141 mask = cpu_str_to_log_mask(optarg);
1142 if (!mask) {
1143 printf("Log items (comma separated):\n");
1144 for (item = cpu_log_items; item->mask != 0; item++) {
1145 printf("%-10s %s\n", item->name, item->help);
1146 }
1147 exit(1);
1148 }
1149 cpu_set_log(mask);
1150}
29e922b6 1151
c235d738
MF
1152void set_cpu_log_filename(const char *optarg)
1153{
1154 cpu_set_log_filename(optarg);
1155}
1156
9a78eead 1157void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1158{
1159 /* XXX: implement xxx_cpu_list for targets that still miss it */
1160#if defined(cpu_list_id)
1161 cpu_list_id(f, cpu_fprintf, optarg);
1162#elif defined(cpu_list)
1163 cpu_list(f, cpu_fprintf); /* deprecated */
1164#endif
1165}
de0b36b6
LC
1166
1167CpuInfoList *qmp_query_cpus(Error **errp)
1168{
1169 CpuInfoList *head = NULL, *cur_item = NULL;
9349b4f9 1170 CPUArchState *env;
de0b36b6
LC
1171
1172 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1173 CpuInfoList *info;
1174
1175 cpu_synchronize_state(env);
1176
1177 info = g_malloc0(sizeof(*info));
1178 info->value = g_malloc0(sizeof(*info->value));
1179 info->value->CPU = env->cpu_index;
1180 info->value->current = (env == first_cpu);
1181 info->value->halted = env->halted;
1182 info->value->thread_id = env->thread_id;
1183#if defined(TARGET_I386)
1184 info->value->has_pc = true;
1185 info->value->pc = env->eip + env->segs[R_CS].base;
1186#elif defined(TARGET_PPC)
1187 info->value->has_nip = true;
1188 info->value->nip = env->nip;
1189#elif defined(TARGET_SPARC)
1190 info->value->has_pc = true;
1191 info->value->pc = env->pc;
1192 info->value->has_npc = true;
1193 info->value->npc = env->npc;
1194#elif defined(TARGET_MIPS)
1195 info->value->has_PC = true;
1196 info->value->PC = env->active_tc.PC;
1197#endif
1198
1199 /* XXX: waiting for the qapi to support GSList */
1200 if (!cur_item) {
1201 head = cur_item = info;
1202 } else {
1203 cur_item->next = info;
1204 cur_item = info;
1205 }
1206 }
1207
1208 return head;
1209}
0cfd6a9a
LC
1210
1211void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1212 bool has_cpu, int64_t cpu_index, Error **errp)
1213{
1214 FILE *f;
1215 uint32_t l;
9349b4f9 1216 CPUArchState *env;
0cfd6a9a
LC
1217 uint8_t buf[1024];
1218
1219 if (!has_cpu) {
1220 cpu_index = 0;
1221 }
1222
1223 for (env = first_cpu; env; env = env->next_cpu) {
1224 if (cpu_index == env->cpu_index) {
1225 break;
1226 }
1227 }
1228
1229 if (env == NULL) {
1230 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1231 "a CPU number");
1232 return;
1233 }
1234
1235 f = fopen(filename, "wb");
1236 if (!f) {
1237 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1238 return;
1239 }
1240
1241 while (size != 0) {
1242 l = sizeof(buf);
1243 if (l > size)
1244 l = size;
1245 cpu_memory_rw_debug(env, addr, buf, l, 0);
1246 if (fwrite(buf, 1, l, f) != l) {
1247 error_set(errp, QERR_IO_ERROR);
1248 goto exit;
1249 }
1250 addr += l;
1251 size -= l;
1252 }
1253
1254exit:
1255 fclose(f);
1256}
6d3962bf
LC
1257
1258void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1259 Error **errp)
1260{
1261 FILE *f;
1262 uint32_t l;
1263 uint8_t buf[1024];
1264
1265 f = fopen(filename, "wb");
1266 if (!f) {
1267 error_set(errp, QERR_OPEN_FILE_FAILED, filename);
1268 return;
1269 }
1270
1271 while (size != 0) {
1272 l = sizeof(buf);
1273 if (l > size)
1274 l = size;
1275 cpu_physical_memory_rw(addr, buf, l, 0);
1276 if (fwrite(buf, 1, l, f) != l) {
1277 error_set(errp, QERR_IO_ERROR);
1278 goto exit;
1279 }
1280 addr += l;
1281 size -= l;
1282 }
1283
1284exit:
1285 fclose(f);
1286}
ab49ab5c
LC
1287
1288void qmp_inject_nmi(Error **errp)
1289{
1290#if defined(TARGET_I386)
9349b4f9 1291 CPUArchState *env;
ab49ab5c
LC
1292
1293 for (env = first_cpu; env != NULL; env = env->next_cpu) {
02c09195
JK
1294 if (!env->apic_state) {
1295 cpu_interrupt(env, CPU_INTERRUPT_NMI);
1296 } else {
1297 apic_deliver_nmi(env->apic_state);
1298 }
ab49ab5c
LC
1299 }
1300#else
1301 error_set(errp, QERR_UNSUPPORTED);
1302#endif
1303}