]> git.proxmox.com Git - qemu.git/blob - cpus.c
icount: use cpu_get_icount() directly
[qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
27
28 #include "monitor/monitor.h"
29 #include "sysemu/sysemu.h"
30 #include "exec/gdbstub.h"
31 #include "sysemu/dma.h"
32 #include "sysemu/kvm.h"
33 #include "qmp-commands.h"
34
35 #include "qemu/thread.h"
36 #include "sysemu/cpus.h"
37 #include "sysemu/qtest.h"
38 #include "qemu/main-loop.h"
39 #include "qemu/bitmap.h"
40 #include "qemu/seqlock.h"
41
42 #ifndef _WIN32
43 #include "qemu/compatfd.h"
44 #endif
45
46 #ifdef CONFIG_LINUX
47
48 #include <sys/prctl.h>
49
50 #ifndef PR_MCE_KILL
51 #define PR_MCE_KILL 33
52 #endif
53
54 #ifndef PR_MCE_KILL_SET
55 #define PR_MCE_KILL_SET 1
56 #endif
57
58 #ifndef PR_MCE_KILL_EARLY
59 #define PR_MCE_KILL_EARLY 1
60 #endif
61
62 #endif /* CONFIG_LINUX */
63
64 static CPUState *next_cpu;
65
66 bool cpu_is_stopped(CPUState *cpu)
67 {
68 return cpu->stopped || !runstate_is_running();
69 }
70
71 static bool cpu_thread_is_idle(CPUState *cpu)
72 {
73 if (cpu->stop || cpu->queued_work_first) {
74 return false;
75 }
76 if (cpu_is_stopped(cpu)) {
77 return true;
78 }
79 if (!cpu->halted || qemu_cpu_has_work(cpu) ||
80 kvm_halt_in_kernel()) {
81 return false;
82 }
83 return true;
84 }
85
86 static bool all_cpu_threads_idle(void)
87 {
88 CPUState *cpu;
89
90 CPU_FOREACH(cpu) {
91 if (!cpu_thread_is_idle(cpu)) {
92 return false;
93 }
94 }
95 return true;
96 }
97
98 /***********************************************************/
99 /* guest cycle counter */
100
101 /* Conversion factor from emulated instructions to virtual clock ticks. */
102 static int icount_time_shift;
103 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
104 #define MAX_ICOUNT_SHIFT 10
105 /* Compensate for varying guest execution speed. */
106 static int64_t qemu_icount_bias;
107 static QEMUTimer *icount_rt_timer;
108 static QEMUTimer *icount_vm_timer;
109 static QEMUTimer *icount_warp_timer;
110 static int64_t vm_clock_warp_start;
111 static int64_t qemu_icount;
112
113 typedef struct TimersState {
114 /* Protected by BQL. */
115 int64_t cpu_ticks_prev;
116 int64_t cpu_ticks_offset;
117
118 /* cpu_clock_offset can be read out of BQL, so protect it with
119 * this lock.
120 */
121 QemuSeqLock vm_clock_seqlock;
122 int64_t cpu_clock_offset;
123 int32_t cpu_ticks_enabled;
124 int64_t dummy;
125 } TimersState;
126
127 static TimersState timers_state;
128
129 /* Return the virtual CPU time, based on the instruction counter. */
130 int64_t cpu_get_icount(void)
131 {
132 int64_t icount;
133 CPUState *cpu = current_cpu;
134
135 icount = qemu_icount;
136 if (cpu) {
137 CPUArchState *env = cpu->env_ptr;
138 if (!can_do_io(env)) {
139 fprintf(stderr, "Bad clock read\n");
140 }
141 icount -= (env->icount_decr.u16.low + env->icount_extra);
142 }
143 return qemu_icount_bias + (icount << icount_time_shift);
144 }
145
146 /* return the host CPU cycle counter and handle stop/restart */
147 /* Caller must hold the BQL */
148 int64_t cpu_get_ticks(void)
149 {
150 if (use_icount) {
151 return cpu_get_icount();
152 }
153 if (!timers_state.cpu_ticks_enabled) {
154 return timers_state.cpu_ticks_offset;
155 } else {
156 int64_t ticks;
157 ticks = cpu_get_real_ticks();
158 if (timers_state.cpu_ticks_prev > ticks) {
159 /* Note: non increasing ticks may happen if the host uses
160 software suspend */
161 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
162 }
163 timers_state.cpu_ticks_prev = ticks;
164 return ticks + timers_state.cpu_ticks_offset;
165 }
166 }
167
168 static int64_t cpu_get_clock_locked(void)
169 {
170 int64_t ti;
171
172 if (!timers_state.cpu_ticks_enabled) {
173 ti = timers_state.cpu_clock_offset;
174 } else {
175 ti = get_clock();
176 ti += timers_state.cpu_clock_offset;
177 }
178
179 return ti;
180 }
181
182 /* return the host CPU monotonic timer and handle stop/restart */
183 int64_t cpu_get_clock(void)
184 {
185 int64_t ti;
186 unsigned start;
187
188 do {
189 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
190 ti = cpu_get_clock_locked();
191 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
192
193 return ti;
194 }
195
196 /* enable cpu_get_ticks()
197 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
198 */
199 void cpu_enable_ticks(void)
200 {
201 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
202 seqlock_write_lock(&timers_state.vm_clock_seqlock);
203 if (!timers_state.cpu_ticks_enabled) {
204 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
205 timers_state.cpu_clock_offset -= get_clock();
206 timers_state.cpu_ticks_enabled = 1;
207 }
208 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
209 }
210
211 /* disable cpu_get_ticks() : the clock is stopped. You must not call
212 * cpu_get_ticks() after that.
213 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
214 */
215 void cpu_disable_ticks(void)
216 {
217 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
218 seqlock_write_lock(&timers_state.vm_clock_seqlock);
219 if (timers_state.cpu_ticks_enabled) {
220 timers_state.cpu_ticks_offset = cpu_get_ticks();
221 timers_state.cpu_clock_offset = cpu_get_clock_locked();
222 timers_state.cpu_ticks_enabled = 0;
223 }
224 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
225 }
226
227 /* Correlation between real and virtual time is always going to be
228 fairly approximate, so ignore small variation.
229 When the guest is idle real and virtual time will be aligned in
230 the IO wait loop. */
231 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
232
233 static void icount_adjust(void)
234 {
235 int64_t cur_time;
236 int64_t cur_icount;
237 int64_t delta;
238 static int64_t last_delta;
239
240 /* If the VM is not running, then do nothing. */
241 if (!runstate_is_running()) {
242 return;
243 }
244
245 cur_time = cpu_get_clock();
246 cur_icount = cpu_get_icount();
247
248 delta = cur_icount - cur_time;
249 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
250 if (delta > 0
251 && last_delta + ICOUNT_WOBBLE < delta * 2
252 && icount_time_shift > 0) {
253 /* The guest is getting too far ahead. Slow time down. */
254 icount_time_shift--;
255 }
256 if (delta < 0
257 && last_delta - ICOUNT_WOBBLE > delta * 2
258 && icount_time_shift < MAX_ICOUNT_SHIFT) {
259 /* The guest is getting too far behind. Speed time up. */
260 icount_time_shift++;
261 }
262 last_delta = delta;
263 qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
264 }
265
266 static void icount_adjust_rt(void *opaque)
267 {
268 timer_mod(icount_rt_timer,
269 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
270 icount_adjust();
271 }
272
273 static void icount_adjust_vm(void *opaque)
274 {
275 timer_mod(icount_vm_timer,
276 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
277 get_ticks_per_sec() / 10);
278 icount_adjust();
279 }
280
281 static int64_t qemu_icount_round(int64_t count)
282 {
283 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
284 }
285
286 static void icount_warp_rt(void *opaque)
287 {
288 if (vm_clock_warp_start == -1) {
289 return;
290 }
291
292 if (runstate_is_running()) {
293 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
294 int64_t warp_delta = clock - vm_clock_warp_start;
295 if (use_icount == 1) {
296 qemu_icount_bias += warp_delta;
297 } else {
298 /*
299 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
300 * far ahead of real time.
301 */
302 int64_t cur_time = cpu_get_clock();
303 int64_t cur_icount = cpu_get_icount();
304 int64_t delta = cur_time - cur_icount;
305 qemu_icount_bias += MIN(warp_delta, delta);
306 }
307 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
308 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
309 }
310 }
311 vm_clock_warp_start = -1;
312 }
313
314 void qtest_clock_warp(int64_t dest)
315 {
316 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
317 assert(qtest_enabled());
318 while (clock < dest) {
319 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
320 int64_t warp = MIN(dest - clock, deadline);
321 qemu_icount_bias += warp;
322 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
323 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
324 }
325 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
326 }
327
328 void qemu_clock_warp(QEMUClockType type)
329 {
330 int64_t deadline;
331
332 /*
333 * There are too many global variables to make the "warp" behavior
334 * applicable to other clocks. But a clock argument removes the
335 * need for if statements all over the place.
336 */
337 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
338 return;
339 }
340
341 /*
342 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
343 * This ensures that the deadline for the timer is computed correctly below.
344 * This also makes sure that the insn counter is synchronized before the
345 * CPU starts running, in case the CPU is woken by an event other than
346 * the earliest QEMU_CLOCK_VIRTUAL timer.
347 */
348 icount_warp_rt(NULL);
349 if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) {
350 timer_del(icount_warp_timer);
351 return;
352 }
353
354 if (qtest_enabled()) {
355 /* When testing, qtest commands advance icount. */
356 return;
357 }
358
359 vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
360 /* We want to use the earliest deadline from ALL vm_clocks */
361 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
362
363 /* Maintain prior (possibly buggy) behaviour where if no deadline
364 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
365 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
366 * nanoseconds.
367 */
368 if ((deadline < 0) || (deadline > INT32_MAX)) {
369 deadline = INT32_MAX;
370 }
371
372 if (deadline > 0) {
373 /*
374 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
375 * sleep. Otherwise, the CPU might be waiting for a future timer
376 * interrupt to wake it up, but the interrupt never comes because
377 * the vCPU isn't running any insns and thus doesn't advance the
378 * QEMU_CLOCK_VIRTUAL.
379 *
380 * An extreme solution for this problem would be to never let VCPUs
381 * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
382 * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
383 * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
384 * after some e"real" time, (related to the time left until the next
385 * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
386 * This avoids that the warps are visible externally; for example,
387 * you will not be sending network packets continuously instead of
388 * every 100ms.
389 */
390 timer_mod(icount_warp_timer, vm_clock_warp_start + deadline);
391 } else if (deadline == 0) {
392 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
393 }
394 }
395
396 static const VMStateDescription vmstate_timers = {
397 .name = "timer",
398 .version_id = 2,
399 .minimum_version_id = 1,
400 .minimum_version_id_old = 1,
401 .fields = (VMStateField[]) {
402 VMSTATE_INT64(cpu_ticks_offset, TimersState),
403 VMSTATE_INT64(dummy, TimersState),
404 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
405 VMSTATE_END_OF_LIST()
406 }
407 };
408
409 void configure_icount(const char *option)
410 {
411 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
412 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
413 if (!option) {
414 return;
415 }
416
417 icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
418 icount_warp_rt, NULL);
419 if (strcmp(option, "auto") != 0) {
420 icount_time_shift = strtol(option, NULL, 0);
421 use_icount = 1;
422 return;
423 }
424
425 use_icount = 2;
426
427 /* 125MIPS seems a reasonable initial guess at the guest speed.
428 It will be corrected fairly quickly anyway. */
429 icount_time_shift = 3;
430
431 /* Have both realtime and virtual time triggers for speed adjustment.
432 The realtime trigger catches emulated time passing too slowly,
433 the virtual time trigger catches emulated time passing too fast.
434 Realtime triggers occur even when idle, so use them less frequently
435 than VM triggers. */
436 icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
437 icount_adjust_rt, NULL);
438 timer_mod(icount_rt_timer,
439 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
440 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
441 icount_adjust_vm, NULL);
442 timer_mod(icount_vm_timer,
443 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
444 get_ticks_per_sec() / 10);
445 }
446
447 /***********************************************************/
448 void hw_error(const char *fmt, ...)
449 {
450 va_list ap;
451 CPUState *cpu;
452
453 va_start(ap, fmt);
454 fprintf(stderr, "qemu: hardware error: ");
455 vfprintf(stderr, fmt, ap);
456 fprintf(stderr, "\n");
457 CPU_FOREACH(cpu) {
458 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
459 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
460 }
461 va_end(ap);
462 abort();
463 }
464
465 void cpu_synchronize_all_states(void)
466 {
467 CPUState *cpu;
468
469 CPU_FOREACH(cpu) {
470 cpu_synchronize_state(cpu);
471 }
472 }
473
474 void cpu_synchronize_all_post_reset(void)
475 {
476 CPUState *cpu;
477
478 CPU_FOREACH(cpu) {
479 cpu_synchronize_post_reset(cpu);
480 }
481 }
482
483 void cpu_synchronize_all_post_init(void)
484 {
485 CPUState *cpu;
486
487 CPU_FOREACH(cpu) {
488 cpu_synchronize_post_init(cpu);
489 }
490 }
491
492 static int do_vm_stop(RunState state)
493 {
494 int ret = 0;
495
496 if (runstate_is_running()) {
497 cpu_disable_ticks();
498 pause_all_vcpus();
499 runstate_set(state);
500 vm_state_notify(0, state);
501 monitor_protocol_event(QEVENT_STOP, NULL);
502 }
503
504 bdrv_drain_all();
505 ret = bdrv_flush_all();
506
507 return ret;
508 }
509
510 static bool cpu_can_run(CPUState *cpu)
511 {
512 if (cpu->stop) {
513 return false;
514 }
515 if (cpu_is_stopped(cpu)) {
516 return false;
517 }
518 return true;
519 }
520
521 static void cpu_handle_guest_debug(CPUState *cpu)
522 {
523 gdb_set_stop_cpu(cpu);
524 qemu_system_debug_request();
525 cpu->stopped = true;
526 }
527
528 static void cpu_signal(int sig)
529 {
530 if (current_cpu) {
531 cpu_exit(current_cpu);
532 }
533 exit_request = 1;
534 }
535
536 #ifdef CONFIG_LINUX
537 static void sigbus_reraise(void)
538 {
539 sigset_t set;
540 struct sigaction action;
541
542 memset(&action, 0, sizeof(action));
543 action.sa_handler = SIG_DFL;
544 if (!sigaction(SIGBUS, &action, NULL)) {
545 raise(SIGBUS);
546 sigemptyset(&set);
547 sigaddset(&set, SIGBUS);
548 sigprocmask(SIG_UNBLOCK, &set, NULL);
549 }
550 perror("Failed to re-raise SIGBUS!\n");
551 abort();
552 }
553
554 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
555 void *ctx)
556 {
557 if (kvm_on_sigbus(siginfo->ssi_code,
558 (void *)(intptr_t)siginfo->ssi_addr)) {
559 sigbus_reraise();
560 }
561 }
562
563 static void qemu_init_sigbus(void)
564 {
565 struct sigaction action;
566
567 memset(&action, 0, sizeof(action));
568 action.sa_flags = SA_SIGINFO;
569 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
570 sigaction(SIGBUS, &action, NULL);
571
572 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
573 }
574
575 static void qemu_kvm_eat_signals(CPUState *cpu)
576 {
577 struct timespec ts = { 0, 0 };
578 siginfo_t siginfo;
579 sigset_t waitset;
580 sigset_t chkset;
581 int r;
582
583 sigemptyset(&waitset);
584 sigaddset(&waitset, SIG_IPI);
585 sigaddset(&waitset, SIGBUS);
586
587 do {
588 r = sigtimedwait(&waitset, &siginfo, &ts);
589 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
590 perror("sigtimedwait");
591 exit(1);
592 }
593
594 switch (r) {
595 case SIGBUS:
596 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
597 sigbus_reraise();
598 }
599 break;
600 default:
601 break;
602 }
603
604 r = sigpending(&chkset);
605 if (r == -1) {
606 perror("sigpending");
607 exit(1);
608 }
609 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
610 }
611
612 #else /* !CONFIG_LINUX */
613
614 static void qemu_init_sigbus(void)
615 {
616 }
617
618 static void qemu_kvm_eat_signals(CPUState *cpu)
619 {
620 }
621 #endif /* !CONFIG_LINUX */
622
623 #ifndef _WIN32
624 static void dummy_signal(int sig)
625 {
626 }
627
628 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
629 {
630 int r;
631 sigset_t set;
632 struct sigaction sigact;
633
634 memset(&sigact, 0, sizeof(sigact));
635 sigact.sa_handler = dummy_signal;
636 sigaction(SIG_IPI, &sigact, NULL);
637
638 pthread_sigmask(SIG_BLOCK, NULL, &set);
639 sigdelset(&set, SIG_IPI);
640 sigdelset(&set, SIGBUS);
641 r = kvm_set_signal_mask(cpu, &set);
642 if (r) {
643 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
644 exit(1);
645 }
646 }
647
648 static void qemu_tcg_init_cpu_signals(void)
649 {
650 sigset_t set;
651 struct sigaction sigact;
652
653 memset(&sigact, 0, sizeof(sigact));
654 sigact.sa_handler = cpu_signal;
655 sigaction(SIG_IPI, &sigact, NULL);
656
657 sigemptyset(&set);
658 sigaddset(&set, SIG_IPI);
659 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
660 }
661
662 #else /* _WIN32 */
663 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
664 {
665 abort();
666 }
667
668 static void qemu_tcg_init_cpu_signals(void)
669 {
670 }
671 #endif /* _WIN32 */
672
673 static QemuMutex qemu_global_mutex;
674 static QemuCond qemu_io_proceeded_cond;
675 static bool iothread_requesting_mutex;
676
677 static QemuThread io_thread;
678
679 static QemuThread *tcg_cpu_thread;
680 static QemuCond *tcg_halt_cond;
681
682 /* cpu creation */
683 static QemuCond qemu_cpu_cond;
684 /* system init */
685 static QemuCond qemu_pause_cond;
686 static QemuCond qemu_work_cond;
687
688 void qemu_init_cpu_loop(void)
689 {
690 qemu_init_sigbus();
691 qemu_cond_init(&qemu_cpu_cond);
692 qemu_cond_init(&qemu_pause_cond);
693 qemu_cond_init(&qemu_work_cond);
694 qemu_cond_init(&qemu_io_proceeded_cond);
695 qemu_mutex_init(&qemu_global_mutex);
696
697 qemu_thread_get_self(&io_thread);
698 }
699
700 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
701 {
702 struct qemu_work_item wi;
703
704 if (qemu_cpu_is_self(cpu)) {
705 func(data);
706 return;
707 }
708
709 wi.func = func;
710 wi.data = data;
711 wi.free = false;
712 if (cpu->queued_work_first == NULL) {
713 cpu->queued_work_first = &wi;
714 } else {
715 cpu->queued_work_last->next = &wi;
716 }
717 cpu->queued_work_last = &wi;
718 wi.next = NULL;
719 wi.done = false;
720
721 qemu_cpu_kick(cpu);
722 while (!wi.done) {
723 CPUState *self_cpu = current_cpu;
724
725 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
726 current_cpu = self_cpu;
727 }
728 }
729
730 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
731 {
732 struct qemu_work_item *wi;
733
734 if (qemu_cpu_is_self(cpu)) {
735 func(data);
736 return;
737 }
738
739 wi = g_malloc0(sizeof(struct qemu_work_item));
740 wi->func = func;
741 wi->data = data;
742 wi->free = true;
743 if (cpu->queued_work_first == NULL) {
744 cpu->queued_work_first = wi;
745 } else {
746 cpu->queued_work_last->next = wi;
747 }
748 cpu->queued_work_last = wi;
749 wi->next = NULL;
750 wi->done = false;
751
752 qemu_cpu_kick(cpu);
753 }
754
755 static void flush_queued_work(CPUState *cpu)
756 {
757 struct qemu_work_item *wi;
758
759 if (cpu->queued_work_first == NULL) {
760 return;
761 }
762
763 while ((wi = cpu->queued_work_first)) {
764 cpu->queued_work_first = wi->next;
765 wi->func(wi->data);
766 wi->done = true;
767 if (wi->free) {
768 g_free(wi);
769 }
770 }
771 cpu->queued_work_last = NULL;
772 qemu_cond_broadcast(&qemu_work_cond);
773 }
774
775 static void qemu_wait_io_event_common(CPUState *cpu)
776 {
777 if (cpu->stop) {
778 cpu->stop = false;
779 cpu->stopped = true;
780 qemu_cond_signal(&qemu_pause_cond);
781 }
782 flush_queued_work(cpu);
783 cpu->thread_kicked = false;
784 }
785
786 static void qemu_tcg_wait_io_event(void)
787 {
788 CPUState *cpu;
789
790 while (all_cpu_threads_idle()) {
791 /* Start accounting real time to the virtual clock if the CPUs
792 are idle. */
793 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
794 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
795 }
796
797 while (iothread_requesting_mutex) {
798 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
799 }
800
801 CPU_FOREACH(cpu) {
802 qemu_wait_io_event_common(cpu);
803 }
804 }
805
806 static void qemu_kvm_wait_io_event(CPUState *cpu)
807 {
808 while (cpu_thread_is_idle(cpu)) {
809 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
810 }
811
812 qemu_kvm_eat_signals(cpu);
813 qemu_wait_io_event_common(cpu);
814 }
815
816 static void *qemu_kvm_cpu_thread_fn(void *arg)
817 {
818 CPUState *cpu = arg;
819 int r;
820
821 qemu_mutex_lock(&qemu_global_mutex);
822 qemu_thread_get_self(cpu->thread);
823 cpu->thread_id = qemu_get_thread_id();
824 current_cpu = cpu;
825
826 r = kvm_init_vcpu(cpu);
827 if (r < 0) {
828 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
829 exit(1);
830 }
831
832 qemu_kvm_init_cpu_signals(cpu);
833
834 /* signal CPU creation */
835 cpu->created = true;
836 qemu_cond_signal(&qemu_cpu_cond);
837
838 while (1) {
839 if (cpu_can_run(cpu)) {
840 r = kvm_cpu_exec(cpu);
841 if (r == EXCP_DEBUG) {
842 cpu_handle_guest_debug(cpu);
843 }
844 }
845 qemu_kvm_wait_io_event(cpu);
846 }
847
848 return NULL;
849 }
850
851 static void *qemu_dummy_cpu_thread_fn(void *arg)
852 {
853 #ifdef _WIN32
854 fprintf(stderr, "qtest is not supported under Windows\n");
855 exit(1);
856 #else
857 CPUState *cpu = arg;
858 sigset_t waitset;
859 int r;
860
861 qemu_mutex_lock_iothread();
862 qemu_thread_get_self(cpu->thread);
863 cpu->thread_id = qemu_get_thread_id();
864
865 sigemptyset(&waitset);
866 sigaddset(&waitset, SIG_IPI);
867
868 /* signal CPU creation */
869 cpu->created = true;
870 qemu_cond_signal(&qemu_cpu_cond);
871
872 current_cpu = cpu;
873 while (1) {
874 current_cpu = NULL;
875 qemu_mutex_unlock_iothread();
876 do {
877 int sig;
878 r = sigwait(&waitset, &sig);
879 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
880 if (r == -1) {
881 perror("sigwait");
882 exit(1);
883 }
884 qemu_mutex_lock_iothread();
885 current_cpu = cpu;
886 qemu_wait_io_event_common(cpu);
887 }
888
889 return NULL;
890 #endif
891 }
892
893 static void tcg_exec_all(void);
894
895 static void *qemu_tcg_cpu_thread_fn(void *arg)
896 {
897 CPUState *cpu = arg;
898
899 qemu_tcg_init_cpu_signals();
900 qemu_thread_get_self(cpu->thread);
901
902 qemu_mutex_lock(&qemu_global_mutex);
903 CPU_FOREACH(cpu) {
904 cpu->thread_id = qemu_get_thread_id();
905 cpu->created = true;
906 }
907 qemu_cond_signal(&qemu_cpu_cond);
908
909 /* wait for initial kick-off after machine start */
910 while (QTAILQ_FIRST(&cpus)->stopped) {
911 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
912
913 /* process any pending work */
914 CPU_FOREACH(cpu) {
915 qemu_wait_io_event_common(cpu);
916 }
917 }
918
919 while (1) {
920 tcg_exec_all();
921
922 if (use_icount) {
923 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
924
925 if (deadline == 0) {
926 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
927 }
928 }
929 qemu_tcg_wait_io_event();
930 }
931
932 return NULL;
933 }
934
935 static void qemu_cpu_kick_thread(CPUState *cpu)
936 {
937 #ifndef _WIN32
938 int err;
939
940 err = pthread_kill(cpu->thread->thread, SIG_IPI);
941 if (err) {
942 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
943 exit(1);
944 }
945 #else /* _WIN32 */
946 if (!qemu_cpu_is_self(cpu)) {
947 CONTEXT tcgContext;
948
949 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
950 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
951 GetLastError());
952 exit(1);
953 }
954
955 /* On multi-core systems, we are not sure that the thread is actually
956 * suspended until we can get the context.
957 */
958 tcgContext.ContextFlags = CONTEXT_CONTROL;
959 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
960 continue;
961 }
962
963 cpu_signal(0);
964
965 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
966 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
967 GetLastError());
968 exit(1);
969 }
970 }
971 #endif
972 }
973
974 void qemu_cpu_kick(CPUState *cpu)
975 {
976 qemu_cond_broadcast(cpu->halt_cond);
977 if (!tcg_enabled() && !cpu->thread_kicked) {
978 qemu_cpu_kick_thread(cpu);
979 cpu->thread_kicked = true;
980 }
981 }
982
983 void qemu_cpu_kick_self(void)
984 {
985 #ifndef _WIN32
986 assert(current_cpu);
987
988 if (!current_cpu->thread_kicked) {
989 qemu_cpu_kick_thread(current_cpu);
990 current_cpu->thread_kicked = true;
991 }
992 #else
993 abort();
994 #endif
995 }
996
997 bool qemu_cpu_is_self(CPUState *cpu)
998 {
999 return qemu_thread_is_self(cpu->thread);
1000 }
1001
1002 static bool qemu_in_vcpu_thread(void)
1003 {
1004 return current_cpu && qemu_cpu_is_self(current_cpu);
1005 }
1006
1007 void qemu_mutex_lock_iothread(void)
1008 {
1009 if (!tcg_enabled()) {
1010 qemu_mutex_lock(&qemu_global_mutex);
1011 } else {
1012 iothread_requesting_mutex = true;
1013 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1014 qemu_cpu_kick_thread(first_cpu);
1015 qemu_mutex_lock(&qemu_global_mutex);
1016 }
1017 iothread_requesting_mutex = false;
1018 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1019 }
1020 }
1021
1022 void qemu_mutex_unlock_iothread(void)
1023 {
1024 qemu_mutex_unlock(&qemu_global_mutex);
1025 }
1026
1027 static int all_vcpus_paused(void)
1028 {
1029 CPUState *cpu;
1030
1031 CPU_FOREACH(cpu) {
1032 if (!cpu->stopped) {
1033 return 0;
1034 }
1035 }
1036
1037 return 1;
1038 }
1039
1040 void pause_all_vcpus(void)
1041 {
1042 CPUState *cpu;
1043
1044 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1045 CPU_FOREACH(cpu) {
1046 cpu->stop = true;
1047 qemu_cpu_kick(cpu);
1048 }
1049
1050 if (qemu_in_vcpu_thread()) {
1051 cpu_stop_current();
1052 if (!kvm_enabled()) {
1053 CPU_FOREACH(cpu) {
1054 cpu->stop = false;
1055 cpu->stopped = true;
1056 }
1057 return;
1058 }
1059 }
1060
1061 while (!all_vcpus_paused()) {
1062 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1063 CPU_FOREACH(cpu) {
1064 qemu_cpu_kick(cpu);
1065 }
1066 }
1067 }
1068
1069 void cpu_resume(CPUState *cpu)
1070 {
1071 cpu->stop = false;
1072 cpu->stopped = false;
1073 qemu_cpu_kick(cpu);
1074 }
1075
1076 void resume_all_vcpus(void)
1077 {
1078 CPUState *cpu;
1079
1080 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1081 CPU_FOREACH(cpu) {
1082 cpu_resume(cpu);
1083 }
1084 }
1085
1086 static void qemu_tcg_init_vcpu(CPUState *cpu)
1087 {
1088 /* share a single thread for all cpus with TCG */
1089 if (!tcg_cpu_thread) {
1090 cpu->thread = g_malloc0(sizeof(QemuThread));
1091 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1092 qemu_cond_init(cpu->halt_cond);
1093 tcg_halt_cond = cpu->halt_cond;
1094 qemu_thread_create(cpu->thread, qemu_tcg_cpu_thread_fn, cpu,
1095 QEMU_THREAD_JOINABLE);
1096 #ifdef _WIN32
1097 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1098 #endif
1099 while (!cpu->created) {
1100 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1101 }
1102 tcg_cpu_thread = cpu->thread;
1103 } else {
1104 cpu->thread = tcg_cpu_thread;
1105 cpu->halt_cond = tcg_halt_cond;
1106 }
1107 }
1108
1109 static void qemu_kvm_start_vcpu(CPUState *cpu)
1110 {
1111 cpu->thread = g_malloc0(sizeof(QemuThread));
1112 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1113 qemu_cond_init(cpu->halt_cond);
1114 qemu_thread_create(cpu->thread, qemu_kvm_cpu_thread_fn, cpu,
1115 QEMU_THREAD_JOINABLE);
1116 while (!cpu->created) {
1117 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1118 }
1119 }
1120
1121 static void qemu_dummy_start_vcpu(CPUState *cpu)
1122 {
1123 cpu->thread = g_malloc0(sizeof(QemuThread));
1124 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1125 qemu_cond_init(cpu->halt_cond);
1126 qemu_thread_create(cpu->thread, qemu_dummy_cpu_thread_fn, cpu,
1127 QEMU_THREAD_JOINABLE);
1128 while (!cpu->created) {
1129 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1130 }
1131 }
1132
1133 void qemu_init_vcpu(CPUState *cpu)
1134 {
1135 cpu->nr_cores = smp_cores;
1136 cpu->nr_threads = smp_threads;
1137 cpu->stopped = true;
1138 if (kvm_enabled()) {
1139 qemu_kvm_start_vcpu(cpu);
1140 } else if (tcg_enabled()) {
1141 qemu_tcg_init_vcpu(cpu);
1142 } else {
1143 qemu_dummy_start_vcpu(cpu);
1144 }
1145 }
1146
1147 void cpu_stop_current(void)
1148 {
1149 if (current_cpu) {
1150 current_cpu->stop = false;
1151 current_cpu->stopped = true;
1152 cpu_exit(current_cpu);
1153 qemu_cond_signal(&qemu_pause_cond);
1154 }
1155 }
1156
1157 int vm_stop(RunState state)
1158 {
1159 if (qemu_in_vcpu_thread()) {
1160 qemu_system_vmstop_request(state);
1161 /*
1162 * FIXME: should not return to device code in case
1163 * vm_stop() has been requested.
1164 */
1165 cpu_stop_current();
1166 return 0;
1167 }
1168
1169 return do_vm_stop(state);
1170 }
1171
1172 /* does a state transition even if the VM is already stopped,
1173 current state is forgotten forever */
1174 int vm_stop_force_state(RunState state)
1175 {
1176 if (runstate_is_running()) {
1177 return vm_stop(state);
1178 } else {
1179 runstate_set(state);
1180 /* Make sure to return an error if the flush in a previous vm_stop()
1181 * failed. */
1182 return bdrv_flush_all();
1183 }
1184 }
1185
1186 static int tcg_cpu_exec(CPUArchState *env)
1187 {
1188 int ret;
1189 #ifdef CONFIG_PROFILER
1190 int64_t ti;
1191 #endif
1192
1193 #ifdef CONFIG_PROFILER
1194 ti = profile_getclock();
1195 #endif
1196 if (use_icount) {
1197 int64_t count;
1198 int64_t deadline;
1199 int decr;
1200 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
1201 env->icount_decr.u16.low = 0;
1202 env->icount_extra = 0;
1203 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1204
1205 /* Maintain prior (possibly buggy) behaviour where if no deadline
1206 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1207 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1208 * nanoseconds.
1209 */
1210 if ((deadline < 0) || (deadline > INT32_MAX)) {
1211 deadline = INT32_MAX;
1212 }
1213
1214 count = qemu_icount_round(deadline);
1215 qemu_icount += count;
1216 decr = (count > 0xffff) ? 0xffff : count;
1217 count -= decr;
1218 env->icount_decr.u16.low = decr;
1219 env->icount_extra = count;
1220 }
1221 ret = cpu_exec(env);
1222 #ifdef CONFIG_PROFILER
1223 qemu_time += profile_getclock() - ti;
1224 #endif
1225 if (use_icount) {
1226 /* Fold pending instructions back into the
1227 instruction counter, and clear the interrupt flag. */
1228 qemu_icount -= (env->icount_decr.u16.low
1229 + env->icount_extra);
1230 env->icount_decr.u32 = 0;
1231 env->icount_extra = 0;
1232 }
1233 return ret;
1234 }
1235
1236 static void tcg_exec_all(void)
1237 {
1238 int r;
1239
1240 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1241 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1242
1243 if (next_cpu == NULL) {
1244 next_cpu = first_cpu;
1245 }
1246 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1247 CPUState *cpu = next_cpu;
1248 CPUArchState *env = cpu->env_ptr;
1249
1250 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1251 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1252
1253 if (cpu_can_run(cpu)) {
1254 r = tcg_cpu_exec(env);
1255 if (r == EXCP_DEBUG) {
1256 cpu_handle_guest_debug(cpu);
1257 break;
1258 }
1259 } else if (cpu->stop || cpu->stopped) {
1260 break;
1261 }
1262 }
1263 exit_request = 0;
1264 }
1265
1266 void set_numa_modes(void)
1267 {
1268 CPUState *cpu;
1269 int i;
1270
1271 CPU_FOREACH(cpu) {
1272 for (i = 0; i < nb_numa_nodes; i++) {
1273 if (test_bit(cpu->cpu_index, node_cpumask[i])) {
1274 cpu->numa_node = i;
1275 }
1276 }
1277 }
1278 }
1279
1280 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1281 {
1282 /* XXX: implement xxx_cpu_list for targets that still miss it */
1283 #if defined(cpu_list)
1284 cpu_list(f, cpu_fprintf);
1285 #endif
1286 }
1287
1288 CpuInfoList *qmp_query_cpus(Error **errp)
1289 {
1290 CpuInfoList *head = NULL, *cur_item = NULL;
1291 CPUState *cpu;
1292
1293 CPU_FOREACH(cpu) {
1294 CpuInfoList *info;
1295 #if defined(TARGET_I386)
1296 X86CPU *x86_cpu = X86_CPU(cpu);
1297 CPUX86State *env = &x86_cpu->env;
1298 #elif defined(TARGET_PPC)
1299 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1300 CPUPPCState *env = &ppc_cpu->env;
1301 #elif defined(TARGET_SPARC)
1302 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1303 CPUSPARCState *env = &sparc_cpu->env;
1304 #elif defined(TARGET_MIPS)
1305 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1306 CPUMIPSState *env = &mips_cpu->env;
1307 #endif
1308
1309 cpu_synchronize_state(cpu);
1310
1311 info = g_malloc0(sizeof(*info));
1312 info->value = g_malloc0(sizeof(*info->value));
1313 info->value->CPU = cpu->cpu_index;
1314 info->value->current = (cpu == first_cpu);
1315 info->value->halted = cpu->halted;
1316 info->value->thread_id = cpu->thread_id;
1317 #if defined(TARGET_I386)
1318 info->value->has_pc = true;
1319 info->value->pc = env->eip + env->segs[R_CS].base;
1320 #elif defined(TARGET_PPC)
1321 info->value->has_nip = true;
1322 info->value->nip = env->nip;
1323 #elif defined(TARGET_SPARC)
1324 info->value->has_pc = true;
1325 info->value->pc = env->pc;
1326 info->value->has_npc = true;
1327 info->value->npc = env->npc;
1328 #elif defined(TARGET_MIPS)
1329 info->value->has_PC = true;
1330 info->value->PC = env->active_tc.PC;
1331 #endif
1332
1333 /* XXX: waiting for the qapi to support GSList */
1334 if (!cur_item) {
1335 head = cur_item = info;
1336 } else {
1337 cur_item->next = info;
1338 cur_item = info;
1339 }
1340 }
1341
1342 return head;
1343 }
1344
1345 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1346 bool has_cpu, int64_t cpu_index, Error **errp)
1347 {
1348 FILE *f;
1349 uint32_t l;
1350 CPUState *cpu;
1351 uint8_t buf[1024];
1352
1353 if (!has_cpu) {
1354 cpu_index = 0;
1355 }
1356
1357 cpu = qemu_get_cpu(cpu_index);
1358 if (cpu == NULL) {
1359 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1360 "a CPU number");
1361 return;
1362 }
1363
1364 f = fopen(filename, "wb");
1365 if (!f) {
1366 error_setg_file_open(errp, errno, filename);
1367 return;
1368 }
1369
1370 while (size != 0) {
1371 l = sizeof(buf);
1372 if (l > size)
1373 l = size;
1374 cpu_memory_rw_debug(cpu, addr, buf, l, 0);
1375 if (fwrite(buf, 1, l, f) != l) {
1376 error_set(errp, QERR_IO_ERROR);
1377 goto exit;
1378 }
1379 addr += l;
1380 size -= l;
1381 }
1382
1383 exit:
1384 fclose(f);
1385 }
1386
1387 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1388 Error **errp)
1389 {
1390 FILE *f;
1391 uint32_t l;
1392 uint8_t buf[1024];
1393
1394 f = fopen(filename, "wb");
1395 if (!f) {
1396 error_setg_file_open(errp, errno, filename);
1397 return;
1398 }
1399
1400 while (size != 0) {
1401 l = sizeof(buf);
1402 if (l > size)
1403 l = size;
1404 cpu_physical_memory_rw(addr, buf, l, 0);
1405 if (fwrite(buf, 1, l, f) != l) {
1406 error_set(errp, QERR_IO_ERROR);
1407 goto exit;
1408 }
1409 addr += l;
1410 size -= l;
1411 }
1412
1413 exit:
1414 fclose(f);
1415 }
1416
1417 void qmp_inject_nmi(Error **errp)
1418 {
1419 #if defined(TARGET_I386)
1420 CPUState *cs;
1421
1422 CPU_FOREACH(cs) {
1423 X86CPU *cpu = X86_CPU(cs);
1424 CPUX86State *env = &cpu->env;
1425
1426 if (!env->apic_state) {
1427 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1428 } else {
1429 apic_deliver_nmi(env->apic_state);
1430 }
1431 }
1432 #elif defined(TARGET_S390X)
1433 CPUState *cs;
1434 S390CPU *cpu;
1435
1436 CPU_FOREACH(cs) {
1437 cpu = S390_CPU(cs);
1438 if (cpu->env.cpu_num == monitor_get_cpu_index()) {
1439 if (s390_cpu_restart(S390_CPU(cs)) == -1) {
1440 error_set(errp, QERR_UNSUPPORTED);
1441 return;
1442 }
1443 break;
1444 }
1445 }
1446 #else
1447 error_set(errp, QERR_UNSUPPORTED);
1448 #endif
1449 }