]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
Revert "audio: fix pc speaker init"
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu/config-file.h"
27 #include "cpu.h"
28 #include "monitor/monitor.h"
29 #include "qapi/error.h"
30 #include "qapi/qapi-commands-misc.h"
31 #include "qapi/qapi-events-run-state.h"
32 #include "qapi/qmp/qerror.h"
33 #include "qemu/error-report.h"
34 #include "sysemu/sysemu.h"
35 #include "sysemu/block-backend.h"
36 #include "exec/gdbstub.h"
37 #include "sysemu/dma.h"
38 #include "sysemu/hw_accel.h"
39 #include "sysemu/kvm.h"
40 #include "sysemu/hax.h"
41 #include "sysemu/hvf.h"
42 #include "sysemu/whpx.h"
43 #include "exec/exec-all.h"
44
45 #include "qemu/thread.h"
46 #include "sysemu/cpus.h"
47 #include "sysemu/qtest.h"
48 #include "qemu/main-loop.h"
49 #include "qemu/option.h"
50 #include "qemu/bitmap.h"
51 #include "qemu/seqlock.h"
52 #include "tcg.h"
53 #include "hw/nmi.h"
54 #include "sysemu/replay.h"
55 #include "hw/boards.h"
56
57 #ifdef CONFIG_LINUX
58
59 #include <sys/prctl.h>
60
61 #ifndef PR_MCE_KILL
62 #define PR_MCE_KILL 33
63 #endif
64
65 #ifndef PR_MCE_KILL_SET
66 #define PR_MCE_KILL_SET 1
67 #endif
68
69 #ifndef PR_MCE_KILL_EARLY
70 #define PR_MCE_KILL_EARLY 1
71 #endif
72
73 #endif /* CONFIG_LINUX */
74
75 int64_t max_delay;
76 int64_t max_advance;
77
78 /* vcpu throttling controls */
79 static QEMUTimer *throttle_timer;
80 static unsigned int throttle_percentage;
81
82 #define CPU_THROTTLE_PCT_MIN 1
83 #define CPU_THROTTLE_PCT_MAX 99
84 #define CPU_THROTTLE_TIMESLICE_NS 10000000
85
86 bool cpu_is_stopped(CPUState *cpu)
87 {
88 return cpu->stopped || !runstate_is_running();
89 }
90
91 static bool cpu_thread_is_idle(CPUState *cpu)
92 {
93 if (cpu->stop || cpu->queued_work_first) {
94 return false;
95 }
96 if (cpu_is_stopped(cpu)) {
97 return true;
98 }
99 if (!cpu->halted || cpu_has_work(cpu) ||
100 kvm_halt_in_kernel()) {
101 return false;
102 }
103 return true;
104 }
105
106 static bool all_cpu_threads_idle(void)
107 {
108 CPUState *cpu;
109
110 CPU_FOREACH(cpu) {
111 if (!cpu_thread_is_idle(cpu)) {
112 return false;
113 }
114 }
115 return true;
116 }
117
118 /***********************************************************/
119 /* guest cycle counter */
120
121 /* Protected by TimersState seqlock */
122
123 static bool icount_sleep = true;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
125 #define MAX_ICOUNT_SHIFT 10
126
127 typedef struct TimersState {
128 /* Protected by BQL. */
129 int64_t cpu_ticks_prev;
130 int64_t cpu_ticks_offset;
131
132 /* Protect fields that can be respectively read outside the
133 * BQL, and written from multiple threads.
134 */
135 QemuSeqLock vm_clock_seqlock;
136 QemuSpin vm_clock_lock;
137
138 int16_t cpu_ticks_enabled;
139
140 /* Conversion factor from emulated instructions to virtual clock ticks. */
141 int16_t icount_time_shift;
142
143 /* Compensate for varying guest execution speed. */
144 int64_t qemu_icount_bias;
145
146 int64_t vm_clock_warp_start;
147 int64_t cpu_clock_offset;
148
149 /* Only written by TCG thread */
150 int64_t qemu_icount;
151
152 /* for adjusting icount */
153 QEMUTimer *icount_rt_timer;
154 QEMUTimer *icount_vm_timer;
155 QEMUTimer *icount_warp_timer;
156 } TimersState;
157
158 static TimersState timers_state;
159 bool mttcg_enabled;
160
161 /*
162 * We default to false if we know other options have been enabled
163 * which are currently incompatible with MTTCG. Otherwise when each
164 * guest (target) has been updated to support:
165 * - atomic instructions
166 * - memory ordering primitives (barriers)
167 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
168 *
169 * Once a guest architecture has been converted to the new primitives
170 * there are two remaining limitations to check.
171 *
172 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
173 * - The host must have a stronger memory order than the guest
174 *
175 * It may be possible in future to support strong guests on weak hosts
176 * but that will require tagging all load/stores in a guest with their
177 * implicit memory order requirements which would likely slow things
178 * down a lot.
179 */
180
181 static bool check_tcg_memory_orders_compatible(void)
182 {
183 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
184 return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
185 #else
186 return false;
187 #endif
188 }
189
190 static bool default_mttcg_enabled(void)
191 {
192 if (use_icount || TCG_OVERSIZED_GUEST) {
193 return false;
194 } else {
195 #ifdef TARGET_SUPPORTS_MTTCG
196 return check_tcg_memory_orders_compatible();
197 #else
198 return false;
199 #endif
200 }
201 }
202
203 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
204 {
205 const char *t = qemu_opt_get(opts, "thread");
206 if (t) {
207 if (strcmp(t, "multi") == 0) {
208 if (TCG_OVERSIZED_GUEST) {
209 error_setg(errp, "No MTTCG when guest word size > hosts");
210 } else if (use_icount) {
211 error_setg(errp, "No MTTCG when icount is enabled");
212 } else {
213 #ifndef TARGET_SUPPORTS_MTTCG
214 warn_report("Guest not yet converted to MTTCG - "
215 "you may get unexpected results");
216 #endif
217 if (!check_tcg_memory_orders_compatible()) {
218 warn_report("Guest expects a stronger memory ordering "
219 "than the host provides");
220 error_printf("This may cause strange/hard to debug errors\n");
221 }
222 mttcg_enabled = true;
223 }
224 } else if (strcmp(t, "single") == 0) {
225 mttcg_enabled = false;
226 } else {
227 error_setg(errp, "Invalid 'thread' setting %s", t);
228 }
229 } else {
230 mttcg_enabled = default_mttcg_enabled();
231 }
232 }
233
234 /* The current number of executed instructions is based on what we
235 * originally budgeted minus the current state of the decrementing
236 * icount counters in extra/u16.low.
237 */
238 static int64_t cpu_get_icount_executed(CPUState *cpu)
239 {
240 return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
241 }
242
243 /*
244 * Update the global shared timer_state.qemu_icount to take into
245 * account executed instructions. This is done by the TCG vCPU
246 * thread so the main-loop can see time has moved forward.
247 */
248 static void cpu_update_icount_locked(CPUState *cpu)
249 {
250 int64_t executed = cpu_get_icount_executed(cpu);
251 cpu->icount_budget -= executed;
252
253 atomic_set_i64(&timers_state.qemu_icount,
254 timers_state.qemu_icount + executed);
255 }
256
257 /*
258 * Update the global shared timer_state.qemu_icount to take into
259 * account executed instructions. This is done by the TCG vCPU
260 * thread so the main-loop can see time has moved forward.
261 */
262 void cpu_update_icount(CPUState *cpu)
263 {
264 seqlock_write_lock(&timers_state.vm_clock_seqlock,
265 &timers_state.vm_clock_lock);
266 cpu_update_icount_locked(cpu);
267 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
268 &timers_state.vm_clock_lock);
269 }
270
271 static int64_t cpu_get_icount_raw_locked(void)
272 {
273 CPUState *cpu = current_cpu;
274
275 if (cpu && cpu->running) {
276 if (!cpu->can_do_io) {
277 error_report("Bad icount read");
278 exit(1);
279 }
280 /* Take into account what has run */
281 cpu_update_icount_locked(cpu);
282 }
283 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
284 return atomic_read_i64(&timers_state.qemu_icount);
285 }
286
287 static int64_t cpu_get_icount_locked(void)
288 {
289 int64_t icount = cpu_get_icount_raw_locked();
290 return atomic_read_i64(&timers_state.qemu_icount_bias) +
291 cpu_icount_to_ns(icount);
292 }
293
294 int64_t cpu_get_icount_raw(void)
295 {
296 int64_t icount;
297 unsigned start;
298
299 do {
300 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
301 icount = cpu_get_icount_raw_locked();
302 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
303
304 return icount;
305 }
306
307 /* Return the virtual CPU time, based on the instruction counter. */
308 int64_t cpu_get_icount(void)
309 {
310 int64_t icount;
311 unsigned start;
312
313 do {
314 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
315 icount = cpu_get_icount_locked();
316 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
317
318 return icount;
319 }
320
321 int64_t cpu_icount_to_ns(int64_t icount)
322 {
323 return icount << atomic_read(&timers_state.icount_time_shift);
324 }
325
326 static int64_t cpu_get_ticks_locked(void)
327 {
328 int64_t ticks = timers_state.cpu_ticks_offset;
329 if (timers_state.cpu_ticks_enabled) {
330 ticks += cpu_get_host_ticks();
331 }
332
333 if (timers_state.cpu_ticks_prev > ticks) {
334 /* Non increasing ticks may happen if the host uses software suspend. */
335 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
336 ticks = timers_state.cpu_ticks_prev;
337 }
338
339 timers_state.cpu_ticks_prev = ticks;
340 return ticks;
341 }
342
343 /* return the time elapsed in VM between vm_start and vm_stop. Unless
344 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
345 * counter.
346 */
347 int64_t cpu_get_ticks(void)
348 {
349 int64_t ticks;
350
351 if (use_icount) {
352 return cpu_get_icount();
353 }
354
355 qemu_spin_lock(&timers_state.vm_clock_lock);
356 ticks = cpu_get_ticks_locked();
357 qemu_spin_unlock(&timers_state.vm_clock_lock);
358 return ticks;
359 }
360
361 static int64_t cpu_get_clock_locked(void)
362 {
363 int64_t time;
364
365 time = timers_state.cpu_clock_offset;
366 if (timers_state.cpu_ticks_enabled) {
367 time += get_clock();
368 }
369
370 return time;
371 }
372
373 /* Return the monotonic time elapsed in VM, i.e.,
374 * the time between vm_start and vm_stop
375 */
376 int64_t cpu_get_clock(void)
377 {
378 int64_t ti;
379 unsigned start;
380
381 do {
382 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
383 ti = cpu_get_clock_locked();
384 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
385
386 return ti;
387 }
388
389 /* enable cpu_get_ticks()
390 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
391 */
392 void cpu_enable_ticks(void)
393 {
394 seqlock_write_lock(&timers_state.vm_clock_seqlock,
395 &timers_state.vm_clock_lock);
396 if (!timers_state.cpu_ticks_enabled) {
397 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
398 timers_state.cpu_clock_offset -= get_clock();
399 timers_state.cpu_ticks_enabled = 1;
400 }
401 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
402 &timers_state.vm_clock_lock);
403 }
404
405 /* disable cpu_get_ticks() : the clock is stopped. You must not call
406 * cpu_get_ticks() after that.
407 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
408 */
409 void cpu_disable_ticks(void)
410 {
411 seqlock_write_lock(&timers_state.vm_clock_seqlock,
412 &timers_state.vm_clock_lock);
413 if (timers_state.cpu_ticks_enabled) {
414 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
415 timers_state.cpu_clock_offset = cpu_get_clock_locked();
416 timers_state.cpu_ticks_enabled = 0;
417 }
418 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
419 &timers_state.vm_clock_lock);
420 }
421
422 /* Correlation between real and virtual time is always going to be
423 fairly approximate, so ignore small variation.
424 When the guest is idle real and virtual time will be aligned in
425 the IO wait loop. */
426 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
427
428 static void icount_adjust(void)
429 {
430 int64_t cur_time;
431 int64_t cur_icount;
432 int64_t delta;
433
434 /* Protected by TimersState mutex. */
435 static int64_t last_delta;
436
437 /* If the VM is not running, then do nothing. */
438 if (!runstate_is_running()) {
439 return;
440 }
441
442 seqlock_write_lock(&timers_state.vm_clock_seqlock,
443 &timers_state.vm_clock_lock);
444 cur_time = cpu_get_clock_locked();
445 cur_icount = cpu_get_icount_locked();
446
447 delta = cur_icount - cur_time;
448 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
449 if (delta > 0
450 && last_delta + ICOUNT_WOBBLE < delta * 2
451 && timers_state.icount_time_shift > 0) {
452 /* The guest is getting too far ahead. Slow time down. */
453 atomic_set(&timers_state.icount_time_shift,
454 timers_state.icount_time_shift - 1);
455 }
456 if (delta < 0
457 && last_delta - ICOUNT_WOBBLE > delta * 2
458 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
459 /* The guest is getting too far behind. Speed time up. */
460 atomic_set(&timers_state.icount_time_shift,
461 timers_state.icount_time_shift + 1);
462 }
463 last_delta = delta;
464 atomic_set_i64(&timers_state.qemu_icount_bias,
465 cur_icount - (timers_state.qemu_icount
466 << timers_state.icount_time_shift));
467 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468 &timers_state.vm_clock_lock);
469 }
470
471 static void icount_adjust_rt(void *opaque)
472 {
473 timer_mod(timers_state.icount_rt_timer,
474 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
475 icount_adjust();
476 }
477
478 static void icount_adjust_vm(void *opaque)
479 {
480 timer_mod(timers_state.icount_vm_timer,
481 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
482 NANOSECONDS_PER_SECOND / 10);
483 icount_adjust();
484 }
485
486 static int64_t qemu_icount_round(int64_t count)
487 {
488 int shift = atomic_read(&timers_state.icount_time_shift);
489 return (count + (1 << shift) - 1) >> shift;
490 }
491
492 static void icount_warp_rt(void)
493 {
494 unsigned seq;
495 int64_t warp_start;
496
497 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
498 * changes from -1 to another value, so the race here is okay.
499 */
500 do {
501 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
502 warp_start = timers_state.vm_clock_warp_start;
503 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
504
505 if (warp_start == -1) {
506 return;
507 }
508
509 seqlock_write_lock(&timers_state.vm_clock_seqlock,
510 &timers_state.vm_clock_lock);
511 if (runstate_is_running()) {
512 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
513 cpu_get_clock_locked());
514 int64_t warp_delta;
515
516 warp_delta = clock - timers_state.vm_clock_warp_start;
517 if (use_icount == 2) {
518 /*
519 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
520 * far ahead of real time.
521 */
522 int64_t cur_icount = cpu_get_icount_locked();
523 int64_t delta = clock - cur_icount;
524 warp_delta = MIN(warp_delta, delta);
525 }
526 atomic_set_i64(&timers_state.qemu_icount_bias,
527 timers_state.qemu_icount_bias + warp_delta);
528 }
529 timers_state.vm_clock_warp_start = -1;
530 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
531 &timers_state.vm_clock_lock);
532
533 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
534 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
535 }
536 }
537
538 static void icount_timer_cb(void *opaque)
539 {
540 /* No need for a checkpoint because the timer already synchronizes
541 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
542 */
543 icount_warp_rt();
544 }
545
546 void qtest_clock_warp(int64_t dest)
547 {
548 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
549 AioContext *aio_context;
550 assert(qtest_enabled());
551 aio_context = qemu_get_aio_context();
552 while (clock < dest) {
553 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
554 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
555
556 seqlock_write_lock(&timers_state.vm_clock_seqlock,
557 &timers_state.vm_clock_lock);
558 atomic_set_i64(&timers_state.qemu_icount_bias,
559 timers_state.qemu_icount_bias + warp);
560 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
561 &timers_state.vm_clock_lock);
562
563 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
564 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
565 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
566 }
567 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
568 }
569
570 void qemu_start_warp_timer(void)
571 {
572 int64_t clock;
573 int64_t deadline;
574
575 if (!use_icount) {
576 return;
577 }
578
579 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
580 * do not fire, so computing the deadline does not make sense.
581 */
582 if (!runstate_is_running()) {
583 return;
584 }
585
586 if (replay_mode != REPLAY_MODE_PLAY) {
587 if (!all_cpu_threads_idle()) {
588 return;
589 }
590
591 if (qtest_enabled()) {
592 /* When testing, qtest commands advance icount. */
593 return;
594 }
595
596 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
597 } else {
598 /* warp clock deterministically in record/replay mode */
599 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
600 /* vCPU is sleeping and warp can't be started.
601 It is probably a race condition: notification sent
602 to vCPU was processed in advance and vCPU went to sleep.
603 Therefore we have to wake it up for doing someting. */
604 if (replay_has_checkpoint()) {
605 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
606 }
607 return;
608 }
609 }
610
611 /* We want to use the earliest deadline from ALL vm_clocks */
612 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
613 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
614 if (deadline < 0) {
615 static bool notified;
616 if (!icount_sleep && !notified) {
617 warn_report("icount sleep disabled and no active timers");
618 notified = true;
619 }
620 return;
621 }
622
623 if (deadline > 0) {
624 /*
625 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
626 * sleep. Otherwise, the CPU might be waiting for a future timer
627 * interrupt to wake it up, but the interrupt never comes because
628 * the vCPU isn't running any insns and thus doesn't advance the
629 * QEMU_CLOCK_VIRTUAL.
630 */
631 if (!icount_sleep) {
632 /*
633 * We never let VCPUs sleep in no sleep icount mode.
634 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
635 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
636 * It is useful when we want a deterministic execution time,
637 * isolated from host latencies.
638 */
639 seqlock_write_lock(&timers_state.vm_clock_seqlock,
640 &timers_state.vm_clock_lock);
641 atomic_set_i64(&timers_state.qemu_icount_bias,
642 timers_state.qemu_icount_bias + deadline);
643 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
644 &timers_state.vm_clock_lock);
645 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
646 } else {
647 /*
648 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
649 * "real" time, (related to the time left until the next event) has
650 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
651 * This avoids that the warps are visible externally; for example,
652 * you will not be sending network packets continuously instead of
653 * every 100ms.
654 */
655 seqlock_write_lock(&timers_state.vm_clock_seqlock,
656 &timers_state.vm_clock_lock);
657 if (timers_state.vm_clock_warp_start == -1
658 || timers_state.vm_clock_warp_start > clock) {
659 timers_state.vm_clock_warp_start = clock;
660 }
661 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
662 &timers_state.vm_clock_lock);
663 timer_mod_anticipate(timers_state.icount_warp_timer,
664 clock + deadline);
665 }
666 } else if (deadline == 0) {
667 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
668 }
669 }
670
671 static void qemu_account_warp_timer(void)
672 {
673 if (!use_icount || !icount_sleep) {
674 return;
675 }
676
677 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
678 * do not fire, so computing the deadline does not make sense.
679 */
680 if (!runstate_is_running()) {
681 return;
682 }
683
684 /* warp clock deterministically in record/replay mode */
685 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
686 return;
687 }
688
689 timer_del(timers_state.icount_warp_timer);
690 icount_warp_rt();
691 }
692
693 static bool icount_state_needed(void *opaque)
694 {
695 return use_icount;
696 }
697
698 static bool warp_timer_state_needed(void *opaque)
699 {
700 TimersState *s = opaque;
701 return s->icount_warp_timer != NULL;
702 }
703
704 static bool adjust_timers_state_needed(void *opaque)
705 {
706 TimersState *s = opaque;
707 return s->icount_rt_timer != NULL;
708 }
709
710 /*
711 * Subsection for warp timer migration is optional, because may not be created
712 */
713 static const VMStateDescription icount_vmstate_warp_timer = {
714 .name = "timer/icount/warp_timer",
715 .version_id = 1,
716 .minimum_version_id = 1,
717 .needed = warp_timer_state_needed,
718 .fields = (VMStateField[]) {
719 VMSTATE_INT64(vm_clock_warp_start, TimersState),
720 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
721 VMSTATE_END_OF_LIST()
722 }
723 };
724
725 static const VMStateDescription icount_vmstate_adjust_timers = {
726 .name = "timer/icount/timers",
727 .version_id = 1,
728 .minimum_version_id = 1,
729 .needed = adjust_timers_state_needed,
730 .fields = (VMStateField[]) {
731 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
732 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
733 VMSTATE_END_OF_LIST()
734 }
735 };
736
737 /*
738 * This is a subsection for icount migration.
739 */
740 static const VMStateDescription icount_vmstate_timers = {
741 .name = "timer/icount",
742 .version_id = 1,
743 .minimum_version_id = 1,
744 .needed = icount_state_needed,
745 .fields = (VMStateField[]) {
746 VMSTATE_INT64(qemu_icount_bias, TimersState),
747 VMSTATE_INT64(qemu_icount, TimersState),
748 VMSTATE_END_OF_LIST()
749 },
750 .subsections = (const VMStateDescription*[]) {
751 &icount_vmstate_warp_timer,
752 &icount_vmstate_adjust_timers,
753 NULL
754 }
755 };
756
757 static const VMStateDescription vmstate_timers = {
758 .name = "timer",
759 .version_id = 2,
760 .minimum_version_id = 1,
761 .fields = (VMStateField[]) {
762 VMSTATE_INT64(cpu_ticks_offset, TimersState),
763 VMSTATE_UNUSED(8),
764 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
765 VMSTATE_END_OF_LIST()
766 },
767 .subsections = (const VMStateDescription*[]) {
768 &icount_vmstate_timers,
769 NULL
770 }
771 };
772
773 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
774 {
775 double pct;
776 double throttle_ratio;
777 long sleeptime_ns;
778
779 if (!cpu_throttle_get_percentage()) {
780 return;
781 }
782
783 pct = (double)cpu_throttle_get_percentage()/100;
784 throttle_ratio = pct / (1 - pct);
785 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
786
787 qemu_mutex_unlock_iothread();
788 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
789 qemu_mutex_lock_iothread();
790 atomic_set(&cpu->throttle_thread_scheduled, 0);
791 }
792
793 static void cpu_throttle_timer_tick(void *opaque)
794 {
795 CPUState *cpu;
796 double pct;
797
798 /* Stop the timer if needed */
799 if (!cpu_throttle_get_percentage()) {
800 return;
801 }
802 CPU_FOREACH(cpu) {
803 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
804 async_run_on_cpu(cpu, cpu_throttle_thread,
805 RUN_ON_CPU_NULL);
806 }
807 }
808
809 pct = (double)cpu_throttle_get_percentage()/100;
810 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
811 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
812 }
813
814 void cpu_throttle_set(int new_throttle_pct)
815 {
816 /* Ensure throttle percentage is within valid range */
817 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
818 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
819
820 atomic_set(&throttle_percentage, new_throttle_pct);
821
822 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
823 CPU_THROTTLE_TIMESLICE_NS);
824 }
825
826 void cpu_throttle_stop(void)
827 {
828 atomic_set(&throttle_percentage, 0);
829 }
830
831 bool cpu_throttle_active(void)
832 {
833 return (cpu_throttle_get_percentage() != 0);
834 }
835
836 int cpu_throttle_get_percentage(void)
837 {
838 return atomic_read(&throttle_percentage);
839 }
840
841 void cpu_ticks_init(void)
842 {
843 seqlock_init(&timers_state.vm_clock_seqlock);
844 qemu_spin_init(&timers_state.vm_clock_lock);
845 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
846 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
847 cpu_throttle_timer_tick, NULL);
848 }
849
850 void configure_icount(QemuOpts *opts, Error **errp)
851 {
852 const char *option;
853 char *rem_str = NULL;
854
855 option = qemu_opt_get(opts, "shift");
856 if (!option) {
857 if (qemu_opt_get(opts, "align") != NULL) {
858 error_setg(errp, "Please specify shift option when using align");
859 }
860 return;
861 }
862
863 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
864 if (icount_sleep) {
865 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
866 icount_timer_cb, NULL);
867 }
868
869 icount_align_option = qemu_opt_get_bool(opts, "align", false);
870
871 if (icount_align_option && !icount_sleep) {
872 error_setg(errp, "align=on and sleep=off are incompatible");
873 }
874 if (strcmp(option, "auto") != 0) {
875 errno = 0;
876 timers_state.icount_time_shift = strtol(option, &rem_str, 0);
877 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
878 error_setg(errp, "icount: Invalid shift value");
879 }
880 use_icount = 1;
881 return;
882 } else if (icount_align_option) {
883 error_setg(errp, "shift=auto and align=on are incompatible");
884 } else if (!icount_sleep) {
885 error_setg(errp, "shift=auto and sleep=off are incompatible");
886 }
887
888 use_icount = 2;
889
890 /* 125MIPS seems a reasonable initial guess at the guest speed.
891 It will be corrected fairly quickly anyway. */
892 timers_state.icount_time_shift = 3;
893
894 /* Have both realtime and virtual time triggers for speed adjustment.
895 The realtime trigger catches emulated time passing too slowly,
896 the virtual time trigger catches emulated time passing too fast.
897 Realtime triggers occur even when idle, so use them less frequently
898 than VM triggers. */
899 timers_state.vm_clock_warp_start = -1;
900 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
901 icount_adjust_rt, NULL);
902 timer_mod(timers_state.icount_rt_timer,
903 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
904 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
905 icount_adjust_vm, NULL);
906 timer_mod(timers_state.icount_vm_timer,
907 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
908 NANOSECONDS_PER_SECOND / 10);
909 }
910
911 /***********************************************************/
912 /* TCG vCPU kick timer
913 *
914 * The kick timer is responsible for moving single threaded vCPU
915 * emulation on to the next vCPU. If more than one vCPU is running a
916 * timer event with force a cpu->exit so the next vCPU can get
917 * scheduled.
918 *
919 * The timer is removed if all vCPUs are idle and restarted again once
920 * idleness is complete.
921 */
922
923 static QEMUTimer *tcg_kick_vcpu_timer;
924 static CPUState *tcg_current_rr_cpu;
925
926 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
927
928 static inline int64_t qemu_tcg_next_kick(void)
929 {
930 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
931 }
932
933 /* Kick the currently round-robin scheduled vCPU */
934 static void qemu_cpu_kick_rr_cpu(void)
935 {
936 CPUState *cpu;
937 do {
938 cpu = atomic_mb_read(&tcg_current_rr_cpu);
939 if (cpu) {
940 cpu_exit(cpu);
941 }
942 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
943 }
944
945 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
946 {
947 }
948
949 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
950 {
951 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
952 qemu_notify_event();
953 return;
954 }
955
956 if (qemu_in_vcpu_thread()) {
957 /* A CPU is currently running; kick it back out to the
958 * tcg_cpu_exec() loop so it will recalculate its
959 * icount deadline immediately.
960 */
961 qemu_cpu_kick(current_cpu);
962 } else if (first_cpu) {
963 /* qemu_cpu_kick is not enough to kick a halted CPU out of
964 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
965 * causes cpu_thread_is_idle to return false. This way,
966 * handle_icount_deadline can run.
967 * If we have no CPUs at all for some reason, we don't
968 * need to do anything.
969 */
970 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
971 }
972 }
973
974 static void kick_tcg_thread(void *opaque)
975 {
976 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
977 qemu_cpu_kick_rr_cpu();
978 }
979
980 static void start_tcg_kick_timer(void)
981 {
982 assert(!mttcg_enabled);
983 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
984 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
985 kick_tcg_thread, NULL);
986 }
987 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
988 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
989 }
990 }
991
992 static void stop_tcg_kick_timer(void)
993 {
994 assert(!mttcg_enabled);
995 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
996 timer_del(tcg_kick_vcpu_timer);
997 }
998 }
999
1000 /***********************************************************/
1001 void hw_error(const char *fmt, ...)
1002 {
1003 va_list ap;
1004 CPUState *cpu;
1005
1006 va_start(ap, fmt);
1007 fprintf(stderr, "qemu: hardware error: ");
1008 vfprintf(stderr, fmt, ap);
1009 fprintf(stderr, "\n");
1010 CPU_FOREACH(cpu) {
1011 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
1012 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
1013 }
1014 va_end(ap);
1015 abort();
1016 }
1017
1018 void cpu_synchronize_all_states(void)
1019 {
1020 CPUState *cpu;
1021
1022 CPU_FOREACH(cpu) {
1023 cpu_synchronize_state(cpu);
1024 /* TODO: move to cpu_synchronize_state() */
1025 if (hvf_enabled()) {
1026 hvf_cpu_synchronize_state(cpu);
1027 }
1028 }
1029 }
1030
1031 void cpu_synchronize_all_post_reset(void)
1032 {
1033 CPUState *cpu;
1034
1035 CPU_FOREACH(cpu) {
1036 cpu_synchronize_post_reset(cpu);
1037 /* TODO: move to cpu_synchronize_post_reset() */
1038 if (hvf_enabled()) {
1039 hvf_cpu_synchronize_post_reset(cpu);
1040 }
1041 }
1042 }
1043
1044 void cpu_synchronize_all_post_init(void)
1045 {
1046 CPUState *cpu;
1047
1048 CPU_FOREACH(cpu) {
1049 cpu_synchronize_post_init(cpu);
1050 /* TODO: move to cpu_synchronize_post_init() */
1051 if (hvf_enabled()) {
1052 hvf_cpu_synchronize_post_init(cpu);
1053 }
1054 }
1055 }
1056
1057 void cpu_synchronize_all_pre_loadvm(void)
1058 {
1059 CPUState *cpu;
1060
1061 CPU_FOREACH(cpu) {
1062 cpu_synchronize_pre_loadvm(cpu);
1063 }
1064 }
1065
1066 static int do_vm_stop(RunState state, bool send_stop)
1067 {
1068 int ret = 0;
1069
1070 if (runstate_is_running()) {
1071 cpu_disable_ticks();
1072 pause_all_vcpus();
1073 runstate_set(state);
1074 vm_state_notify(0, state);
1075 if (send_stop) {
1076 qapi_event_send_stop();
1077 }
1078 }
1079
1080 bdrv_drain_all();
1081 replay_disable_events();
1082 ret = bdrv_flush_all();
1083
1084 return ret;
1085 }
1086
1087 /* Special vm_stop() variant for terminating the process. Historically clients
1088 * did not expect a QMP STOP event and so we need to retain compatibility.
1089 */
1090 int vm_shutdown(void)
1091 {
1092 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1093 }
1094
1095 static bool cpu_can_run(CPUState *cpu)
1096 {
1097 if (cpu->stop) {
1098 return false;
1099 }
1100 if (cpu_is_stopped(cpu)) {
1101 return false;
1102 }
1103 return true;
1104 }
1105
1106 static void cpu_handle_guest_debug(CPUState *cpu)
1107 {
1108 gdb_set_stop_cpu(cpu);
1109 qemu_system_debug_request();
1110 cpu->stopped = true;
1111 }
1112
1113 #ifdef CONFIG_LINUX
1114 static void sigbus_reraise(void)
1115 {
1116 sigset_t set;
1117 struct sigaction action;
1118
1119 memset(&action, 0, sizeof(action));
1120 action.sa_handler = SIG_DFL;
1121 if (!sigaction(SIGBUS, &action, NULL)) {
1122 raise(SIGBUS);
1123 sigemptyset(&set);
1124 sigaddset(&set, SIGBUS);
1125 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1126 }
1127 perror("Failed to re-raise SIGBUS!\n");
1128 abort();
1129 }
1130
1131 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1132 {
1133 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1134 sigbus_reraise();
1135 }
1136
1137 if (current_cpu) {
1138 /* Called asynchronously in VCPU thread. */
1139 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1140 sigbus_reraise();
1141 }
1142 } else {
1143 /* Called synchronously (via signalfd) in main thread. */
1144 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1145 sigbus_reraise();
1146 }
1147 }
1148 }
1149
1150 static void qemu_init_sigbus(void)
1151 {
1152 struct sigaction action;
1153
1154 memset(&action, 0, sizeof(action));
1155 action.sa_flags = SA_SIGINFO;
1156 action.sa_sigaction = sigbus_handler;
1157 sigaction(SIGBUS, &action, NULL);
1158
1159 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1160 }
1161 #else /* !CONFIG_LINUX */
1162 static void qemu_init_sigbus(void)
1163 {
1164 }
1165 #endif /* !CONFIG_LINUX */
1166
1167 static QemuMutex qemu_global_mutex;
1168
1169 static QemuThread io_thread;
1170
1171 /* cpu creation */
1172 static QemuCond qemu_cpu_cond;
1173 /* system init */
1174 static QemuCond qemu_pause_cond;
1175
1176 void qemu_init_cpu_loop(void)
1177 {
1178 qemu_init_sigbus();
1179 qemu_cond_init(&qemu_cpu_cond);
1180 qemu_cond_init(&qemu_pause_cond);
1181 qemu_mutex_init(&qemu_global_mutex);
1182
1183 qemu_thread_get_self(&io_thread);
1184 }
1185
1186 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1187 {
1188 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1189 }
1190
1191 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1192 {
1193 if (kvm_destroy_vcpu(cpu) < 0) {
1194 error_report("kvm_destroy_vcpu failed");
1195 exit(EXIT_FAILURE);
1196 }
1197 }
1198
1199 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1200 {
1201 }
1202
1203 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1204 {
1205 g_assert(qemu_cpu_is_self(cpu));
1206 cpu->stop = false;
1207 cpu->stopped = true;
1208 if (exit) {
1209 cpu_exit(cpu);
1210 }
1211 qemu_cond_broadcast(&qemu_pause_cond);
1212 }
1213
1214 static void qemu_wait_io_event_common(CPUState *cpu)
1215 {
1216 atomic_mb_set(&cpu->thread_kicked, false);
1217 if (cpu->stop) {
1218 qemu_cpu_stop(cpu, false);
1219 }
1220 process_queued_cpu_work(cpu);
1221 }
1222
1223 static void qemu_tcg_rr_wait_io_event(void)
1224 {
1225 CPUState *cpu;
1226
1227 while (all_cpu_threads_idle()) {
1228 stop_tcg_kick_timer();
1229 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1230 }
1231
1232 start_tcg_kick_timer();
1233
1234 CPU_FOREACH(cpu) {
1235 qemu_wait_io_event_common(cpu);
1236 }
1237 }
1238
1239 static void qemu_wait_io_event(CPUState *cpu)
1240 {
1241 while (cpu_thread_is_idle(cpu)) {
1242 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1243 }
1244
1245 #ifdef _WIN32
1246 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1247 if (!tcg_enabled()) {
1248 SleepEx(0, TRUE);
1249 }
1250 #endif
1251 qemu_wait_io_event_common(cpu);
1252 }
1253
1254 static void *qemu_kvm_cpu_thread_fn(void *arg)
1255 {
1256 CPUState *cpu = arg;
1257 int r;
1258
1259 rcu_register_thread();
1260
1261 qemu_mutex_lock_iothread();
1262 qemu_thread_get_self(cpu->thread);
1263 cpu->thread_id = qemu_get_thread_id();
1264 cpu->can_do_io = 1;
1265 current_cpu = cpu;
1266
1267 r = kvm_init_vcpu(cpu);
1268 if (r < 0) {
1269 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1270 exit(1);
1271 }
1272
1273 kvm_init_cpu_signals(cpu);
1274
1275 /* signal CPU creation */
1276 cpu->created = true;
1277 qemu_cond_signal(&qemu_cpu_cond);
1278
1279 do {
1280 if (cpu_can_run(cpu)) {
1281 r = kvm_cpu_exec(cpu);
1282 if (r == EXCP_DEBUG) {
1283 cpu_handle_guest_debug(cpu);
1284 }
1285 }
1286 qemu_wait_io_event(cpu);
1287 } while (!cpu->unplug || cpu_can_run(cpu));
1288
1289 qemu_kvm_destroy_vcpu(cpu);
1290 cpu->created = false;
1291 qemu_cond_signal(&qemu_cpu_cond);
1292 qemu_mutex_unlock_iothread();
1293 rcu_unregister_thread();
1294 return NULL;
1295 }
1296
1297 static void *qemu_dummy_cpu_thread_fn(void *arg)
1298 {
1299 #ifdef _WIN32
1300 error_report("qtest is not supported under Windows");
1301 exit(1);
1302 #else
1303 CPUState *cpu = arg;
1304 sigset_t waitset;
1305 int r;
1306
1307 rcu_register_thread();
1308
1309 qemu_mutex_lock_iothread();
1310 qemu_thread_get_self(cpu->thread);
1311 cpu->thread_id = qemu_get_thread_id();
1312 cpu->can_do_io = 1;
1313 current_cpu = cpu;
1314
1315 sigemptyset(&waitset);
1316 sigaddset(&waitset, SIG_IPI);
1317
1318 /* signal CPU creation */
1319 cpu->created = true;
1320 qemu_cond_signal(&qemu_cpu_cond);
1321
1322 do {
1323 qemu_mutex_unlock_iothread();
1324 do {
1325 int sig;
1326 r = sigwait(&waitset, &sig);
1327 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1328 if (r == -1) {
1329 perror("sigwait");
1330 exit(1);
1331 }
1332 qemu_mutex_lock_iothread();
1333 qemu_wait_io_event(cpu);
1334 } while (!cpu->unplug);
1335
1336 qemu_mutex_unlock_iothread();
1337 rcu_unregister_thread();
1338 return NULL;
1339 #endif
1340 }
1341
1342 static int64_t tcg_get_icount_limit(void)
1343 {
1344 int64_t deadline;
1345
1346 if (replay_mode != REPLAY_MODE_PLAY) {
1347 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1348
1349 /* Maintain prior (possibly buggy) behaviour where if no deadline
1350 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1351 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1352 * nanoseconds.
1353 */
1354 if ((deadline < 0) || (deadline > INT32_MAX)) {
1355 deadline = INT32_MAX;
1356 }
1357
1358 return qemu_icount_round(deadline);
1359 } else {
1360 return replay_get_instructions();
1361 }
1362 }
1363
1364 static void handle_icount_deadline(void)
1365 {
1366 assert(qemu_in_vcpu_thread());
1367 if (use_icount) {
1368 int64_t deadline =
1369 qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1370
1371 if (deadline == 0) {
1372 /* Wake up other AioContexts. */
1373 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1374 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1375 }
1376 }
1377 }
1378
1379 static void prepare_icount_for_run(CPUState *cpu)
1380 {
1381 if (use_icount) {
1382 int insns_left;
1383
1384 /* These should always be cleared by process_icount_data after
1385 * each vCPU execution. However u16.high can be raised
1386 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1387 */
1388 g_assert(cpu->icount_decr.u16.low == 0);
1389 g_assert(cpu->icount_extra == 0);
1390
1391 cpu->icount_budget = tcg_get_icount_limit();
1392 insns_left = MIN(0xffff, cpu->icount_budget);
1393 cpu->icount_decr.u16.low = insns_left;
1394 cpu->icount_extra = cpu->icount_budget - insns_left;
1395
1396 replay_mutex_lock();
1397 }
1398 }
1399
1400 static void process_icount_data(CPUState *cpu)
1401 {
1402 if (use_icount) {
1403 /* Account for executed instructions */
1404 cpu_update_icount(cpu);
1405
1406 /* Reset the counters */
1407 cpu->icount_decr.u16.low = 0;
1408 cpu->icount_extra = 0;
1409 cpu->icount_budget = 0;
1410
1411 replay_account_executed_instructions();
1412
1413 replay_mutex_unlock();
1414 }
1415 }
1416
1417
1418 static int tcg_cpu_exec(CPUState *cpu)
1419 {
1420 int ret;
1421 #ifdef CONFIG_PROFILER
1422 int64_t ti;
1423 #endif
1424
1425 assert(tcg_enabled());
1426 #ifdef CONFIG_PROFILER
1427 ti = profile_getclock();
1428 #endif
1429 cpu_exec_start(cpu);
1430 ret = cpu_exec(cpu);
1431 cpu_exec_end(cpu);
1432 #ifdef CONFIG_PROFILER
1433 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1434 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1435 #endif
1436 return ret;
1437 }
1438
1439 /* Destroy any remaining vCPUs which have been unplugged and have
1440 * finished running
1441 */
1442 static void deal_with_unplugged_cpus(void)
1443 {
1444 CPUState *cpu;
1445
1446 CPU_FOREACH(cpu) {
1447 if (cpu->unplug && !cpu_can_run(cpu)) {
1448 qemu_tcg_destroy_vcpu(cpu);
1449 cpu->created = false;
1450 qemu_cond_signal(&qemu_cpu_cond);
1451 break;
1452 }
1453 }
1454 }
1455
1456 /* Single-threaded TCG
1457 *
1458 * In the single-threaded case each vCPU is simulated in turn. If
1459 * there is more than a single vCPU we create a simple timer to kick
1460 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1461 * This is done explicitly rather than relying on side-effects
1462 * elsewhere.
1463 */
1464
1465 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1466 {
1467 CPUState *cpu = arg;
1468
1469 assert(tcg_enabled());
1470 rcu_register_thread();
1471 tcg_register_thread();
1472
1473 qemu_mutex_lock_iothread();
1474 qemu_thread_get_self(cpu->thread);
1475
1476 cpu->thread_id = qemu_get_thread_id();
1477 cpu->created = true;
1478 cpu->can_do_io = 1;
1479 qemu_cond_signal(&qemu_cpu_cond);
1480
1481 /* wait for initial kick-off after machine start */
1482 while (first_cpu->stopped) {
1483 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1484
1485 /* process any pending work */
1486 CPU_FOREACH(cpu) {
1487 current_cpu = cpu;
1488 qemu_wait_io_event_common(cpu);
1489 }
1490 }
1491
1492 start_tcg_kick_timer();
1493
1494 cpu = first_cpu;
1495
1496 /* process any pending work */
1497 cpu->exit_request = 1;
1498
1499 while (1) {
1500 qemu_mutex_unlock_iothread();
1501 replay_mutex_lock();
1502 qemu_mutex_lock_iothread();
1503 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1504 qemu_account_warp_timer();
1505
1506 /* Run the timers here. This is much more efficient than
1507 * waking up the I/O thread and waiting for completion.
1508 */
1509 handle_icount_deadline();
1510
1511 replay_mutex_unlock();
1512
1513 if (!cpu) {
1514 cpu = first_cpu;
1515 }
1516
1517 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1518
1519 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1520 current_cpu = cpu;
1521
1522 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1523 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1524
1525 if (cpu_can_run(cpu)) {
1526 int r;
1527
1528 qemu_mutex_unlock_iothread();
1529 prepare_icount_for_run(cpu);
1530
1531 r = tcg_cpu_exec(cpu);
1532
1533 process_icount_data(cpu);
1534 qemu_mutex_lock_iothread();
1535
1536 if (r == EXCP_DEBUG) {
1537 cpu_handle_guest_debug(cpu);
1538 break;
1539 } else if (r == EXCP_ATOMIC) {
1540 qemu_mutex_unlock_iothread();
1541 cpu_exec_step_atomic(cpu);
1542 qemu_mutex_lock_iothread();
1543 break;
1544 }
1545 } else if (cpu->stop) {
1546 if (cpu->unplug) {
1547 cpu = CPU_NEXT(cpu);
1548 }
1549 break;
1550 }
1551
1552 cpu = CPU_NEXT(cpu);
1553 } /* while (cpu && !cpu->exit_request).. */
1554
1555 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1556 atomic_set(&tcg_current_rr_cpu, NULL);
1557
1558 if (cpu && cpu->exit_request) {
1559 atomic_mb_set(&cpu->exit_request, 0);
1560 }
1561
1562 if (use_icount && all_cpu_threads_idle()) {
1563 /*
1564 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1565 * in the main_loop, wake it up in order to start the warp timer.
1566 */
1567 qemu_notify_event();
1568 }
1569
1570 qemu_tcg_rr_wait_io_event();
1571 deal_with_unplugged_cpus();
1572 }
1573
1574 rcu_unregister_thread();
1575 return NULL;
1576 }
1577
1578 static void *qemu_hax_cpu_thread_fn(void *arg)
1579 {
1580 CPUState *cpu = arg;
1581 int r;
1582
1583 rcu_register_thread();
1584 qemu_mutex_lock_iothread();
1585 qemu_thread_get_self(cpu->thread);
1586
1587 cpu->thread_id = qemu_get_thread_id();
1588 cpu->created = true;
1589 cpu->halted = 0;
1590 current_cpu = cpu;
1591
1592 hax_init_vcpu(cpu);
1593 qemu_cond_signal(&qemu_cpu_cond);
1594
1595 do {
1596 if (cpu_can_run(cpu)) {
1597 r = hax_smp_cpu_exec(cpu);
1598 if (r == EXCP_DEBUG) {
1599 cpu_handle_guest_debug(cpu);
1600 }
1601 }
1602
1603 qemu_wait_io_event(cpu);
1604 } while (!cpu->unplug || cpu_can_run(cpu));
1605 rcu_unregister_thread();
1606 return NULL;
1607 }
1608
1609 /* The HVF-specific vCPU thread function. This one should only run when the host
1610 * CPU supports the VMX "unrestricted guest" feature. */
1611 static void *qemu_hvf_cpu_thread_fn(void *arg)
1612 {
1613 CPUState *cpu = arg;
1614
1615 int r;
1616
1617 assert(hvf_enabled());
1618
1619 rcu_register_thread();
1620
1621 qemu_mutex_lock_iothread();
1622 qemu_thread_get_self(cpu->thread);
1623
1624 cpu->thread_id = qemu_get_thread_id();
1625 cpu->can_do_io = 1;
1626 current_cpu = cpu;
1627
1628 hvf_init_vcpu(cpu);
1629
1630 /* signal CPU creation */
1631 cpu->created = true;
1632 qemu_cond_signal(&qemu_cpu_cond);
1633
1634 do {
1635 if (cpu_can_run(cpu)) {
1636 r = hvf_vcpu_exec(cpu);
1637 if (r == EXCP_DEBUG) {
1638 cpu_handle_guest_debug(cpu);
1639 }
1640 }
1641 qemu_wait_io_event(cpu);
1642 } while (!cpu->unplug || cpu_can_run(cpu));
1643
1644 hvf_vcpu_destroy(cpu);
1645 cpu->created = false;
1646 qemu_cond_signal(&qemu_cpu_cond);
1647 qemu_mutex_unlock_iothread();
1648 rcu_unregister_thread();
1649 return NULL;
1650 }
1651
1652 static void *qemu_whpx_cpu_thread_fn(void *arg)
1653 {
1654 CPUState *cpu = arg;
1655 int r;
1656
1657 rcu_register_thread();
1658
1659 qemu_mutex_lock_iothread();
1660 qemu_thread_get_self(cpu->thread);
1661 cpu->thread_id = qemu_get_thread_id();
1662 current_cpu = cpu;
1663
1664 r = whpx_init_vcpu(cpu);
1665 if (r < 0) {
1666 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1667 exit(1);
1668 }
1669
1670 /* signal CPU creation */
1671 cpu->created = true;
1672 qemu_cond_signal(&qemu_cpu_cond);
1673
1674 do {
1675 if (cpu_can_run(cpu)) {
1676 r = whpx_vcpu_exec(cpu);
1677 if (r == EXCP_DEBUG) {
1678 cpu_handle_guest_debug(cpu);
1679 }
1680 }
1681 while (cpu_thread_is_idle(cpu)) {
1682 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1683 }
1684 qemu_wait_io_event_common(cpu);
1685 } while (!cpu->unplug || cpu_can_run(cpu));
1686
1687 whpx_destroy_vcpu(cpu);
1688 cpu->created = false;
1689 qemu_cond_signal(&qemu_cpu_cond);
1690 qemu_mutex_unlock_iothread();
1691 rcu_unregister_thread();
1692 return NULL;
1693 }
1694
1695 #ifdef _WIN32
1696 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1697 {
1698 }
1699 #endif
1700
1701 /* Multi-threaded TCG
1702 *
1703 * In the multi-threaded case each vCPU has its own thread. The TLS
1704 * variable current_cpu can be used deep in the code to find the
1705 * current CPUState for a given thread.
1706 */
1707
1708 static void *qemu_tcg_cpu_thread_fn(void *arg)
1709 {
1710 CPUState *cpu = arg;
1711
1712 assert(tcg_enabled());
1713 g_assert(!use_icount);
1714
1715 rcu_register_thread();
1716 tcg_register_thread();
1717
1718 qemu_mutex_lock_iothread();
1719 qemu_thread_get_self(cpu->thread);
1720
1721 cpu->thread_id = qemu_get_thread_id();
1722 cpu->created = true;
1723 cpu->can_do_io = 1;
1724 current_cpu = cpu;
1725 qemu_cond_signal(&qemu_cpu_cond);
1726
1727 /* process any pending work */
1728 cpu->exit_request = 1;
1729
1730 do {
1731 if (cpu_can_run(cpu)) {
1732 int r;
1733 qemu_mutex_unlock_iothread();
1734 r = tcg_cpu_exec(cpu);
1735 qemu_mutex_lock_iothread();
1736 switch (r) {
1737 case EXCP_DEBUG:
1738 cpu_handle_guest_debug(cpu);
1739 break;
1740 case EXCP_HALTED:
1741 /* during start-up the vCPU is reset and the thread is
1742 * kicked several times. If we don't ensure we go back
1743 * to sleep in the halted state we won't cleanly
1744 * start-up when the vCPU is enabled.
1745 *
1746 * cpu->halted should ensure we sleep in wait_io_event
1747 */
1748 g_assert(cpu->halted);
1749 break;
1750 case EXCP_ATOMIC:
1751 qemu_mutex_unlock_iothread();
1752 cpu_exec_step_atomic(cpu);
1753 qemu_mutex_lock_iothread();
1754 default:
1755 /* Ignore everything else? */
1756 break;
1757 }
1758 }
1759
1760 atomic_mb_set(&cpu->exit_request, 0);
1761 qemu_wait_io_event(cpu);
1762 } while (!cpu->unplug || cpu_can_run(cpu));
1763
1764 qemu_tcg_destroy_vcpu(cpu);
1765 cpu->created = false;
1766 qemu_cond_signal(&qemu_cpu_cond);
1767 qemu_mutex_unlock_iothread();
1768 rcu_unregister_thread();
1769 return NULL;
1770 }
1771
1772 static void qemu_cpu_kick_thread(CPUState *cpu)
1773 {
1774 #ifndef _WIN32
1775 int err;
1776
1777 if (cpu->thread_kicked) {
1778 return;
1779 }
1780 cpu->thread_kicked = true;
1781 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1782 if (err && err != ESRCH) {
1783 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1784 exit(1);
1785 }
1786 #else /* _WIN32 */
1787 if (!qemu_cpu_is_self(cpu)) {
1788 if (whpx_enabled()) {
1789 whpx_vcpu_kick(cpu);
1790 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1791 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1792 __func__, GetLastError());
1793 exit(1);
1794 }
1795 }
1796 #endif
1797 }
1798
1799 void qemu_cpu_kick(CPUState *cpu)
1800 {
1801 qemu_cond_broadcast(cpu->halt_cond);
1802 if (tcg_enabled()) {
1803 cpu_exit(cpu);
1804 /* NOP unless doing single-thread RR */
1805 qemu_cpu_kick_rr_cpu();
1806 } else {
1807 if (hax_enabled()) {
1808 /*
1809 * FIXME: race condition with the exit_request check in
1810 * hax_vcpu_hax_exec
1811 */
1812 cpu->exit_request = 1;
1813 }
1814 qemu_cpu_kick_thread(cpu);
1815 }
1816 }
1817
1818 void qemu_cpu_kick_self(void)
1819 {
1820 assert(current_cpu);
1821 qemu_cpu_kick_thread(current_cpu);
1822 }
1823
1824 bool qemu_cpu_is_self(CPUState *cpu)
1825 {
1826 return qemu_thread_is_self(cpu->thread);
1827 }
1828
1829 bool qemu_in_vcpu_thread(void)
1830 {
1831 return current_cpu && qemu_cpu_is_self(current_cpu);
1832 }
1833
1834 static __thread bool iothread_locked = false;
1835
1836 bool qemu_mutex_iothread_locked(void)
1837 {
1838 return iothread_locked;
1839 }
1840
1841 /*
1842 * The BQL is taken from so many places that it is worth profiling the
1843 * callers directly, instead of funneling them all through a single function.
1844 */
1845 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1846 {
1847 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1848
1849 g_assert(!qemu_mutex_iothread_locked());
1850 bql_lock(&qemu_global_mutex, file, line);
1851 iothread_locked = true;
1852 }
1853
1854 void qemu_mutex_unlock_iothread(void)
1855 {
1856 g_assert(qemu_mutex_iothread_locked());
1857 iothread_locked = false;
1858 qemu_mutex_unlock(&qemu_global_mutex);
1859 }
1860
1861 static bool all_vcpus_paused(void)
1862 {
1863 CPUState *cpu;
1864
1865 CPU_FOREACH(cpu) {
1866 if (!cpu->stopped) {
1867 return false;
1868 }
1869 }
1870
1871 return true;
1872 }
1873
1874 void pause_all_vcpus(void)
1875 {
1876 CPUState *cpu;
1877
1878 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1879 CPU_FOREACH(cpu) {
1880 if (qemu_cpu_is_self(cpu)) {
1881 qemu_cpu_stop(cpu, true);
1882 } else {
1883 cpu->stop = true;
1884 qemu_cpu_kick(cpu);
1885 }
1886 }
1887
1888 /* We need to drop the replay_lock so any vCPU threads woken up
1889 * can finish their replay tasks
1890 */
1891 replay_mutex_unlock();
1892
1893 while (!all_vcpus_paused()) {
1894 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1895 CPU_FOREACH(cpu) {
1896 qemu_cpu_kick(cpu);
1897 }
1898 }
1899
1900 qemu_mutex_unlock_iothread();
1901 replay_mutex_lock();
1902 qemu_mutex_lock_iothread();
1903 }
1904
1905 void cpu_resume(CPUState *cpu)
1906 {
1907 cpu->stop = false;
1908 cpu->stopped = false;
1909 qemu_cpu_kick(cpu);
1910 }
1911
1912 void resume_all_vcpus(void)
1913 {
1914 CPUState *cpu;
1915
1916 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1917 CPU_FOREACH(cpu) {
1918 cpu_resume(cpu);
1919 }
1920 }
1921
1922 void cpu_remove_sync(CPUState *cpu)
1923 {
1924 cpu->stop = true;
1925 cpu->unplug = true;
1926 qemu_cpu_kick(cpu);
1927 qemu_mutex_unlock_iothread();
1928 qemu_thread_join(cpu->thread);
1929 qemu_mutex_lock_iothread();
1930 }
1931
1932 /* For temporary buffers for forming a name */
1933 #define VCPU_THREAD_NAME_SIZE 16
1934
1935 static void qemu_tcg_init_vcpu(CPUState *cpu)
1936 {
1937 char thread_name[VCPU_THREAD_NAME_SIZE];
1938 static QemuCond *single_tcg_halt_cond;
1939 static QemuThread *single_tcg_cpu_thread;
1940 static int tcg_region_inited;
1941
1942 assert(tcg_enabled());
1943 /*
1944 * Initialize TCG regions--once. Now is a good time, because:
1945 * (1) TCG's init context, prologue and target globals have been set up.
1946 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1947 * -accel flag is processed, so the check doesn't work then).
1948 */
1949 if (!tcg_region_inited) {
1950 tcg_region_inited = 1;
1951 tcg_region_init();
1952 }
1953
1954 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1955 cpu->thread = g_malloc0(sizeof(QemuThread));
1956 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1957 qemu_cond_init(cpu->halt_cond);
1958
1959 if (qemu_tcg_mttcg_enabled()) {
1960 /* create a thread per vCPU with TCG (MTTCG) */
1961 parallel_cpus = true;
1962 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1963 cpu->cpu_index);
1964
1965 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1966 cpu, QEMU_THREAD_JOINABLE);
1967
1968 } else {
1969 /* share a single thread for all cpus with TCG */
1970 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1971 qemu_thread_create(cpu->thread, thread_name,
1972 qemu_tcg_rr_cpu_thread_fn,
1973 cpu, QEMU_THREAD_JOINABLE);
1974
1975 single_tcg_halt_cond = cpu->halt_cond;
1976 single_tcg_cpu_thread = cpu->thread;
1977 }
1978 #ifdef _WIN32
1979 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1980 #endif
1981 } else {
1982 /* For non-MTTCG cases we share the thread */
1983 cpu->thread = single_tcg_cpu_thread;
1984 cpu->halt_cond = single_tcg_halt_cond;
1985 cpu->thread_id = first_cpu->thread_id;
1986 cpu->can_do_io = 1;
1987 cpu->created = true;
1988 }
1989 }
1990
1991 static void qemu_hax_start_vcpu(CPUState *cpu)
1992 {
1993 char thread_name[VCPU_THREAD_NAME_SIZE];
1994
1995 cpu->thread = g_malloc0(sizeof(QemuThread));
1996 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1997 qemu_cond_init(cpu->halt_cond);
1998
1999 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2000 cpu->cpu_index);
2001 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2002 cpu, QEMU_THREAD_JOINABLE);
2003 #ifdef _WIN32
2004 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2005 #endif
2006 }
2007
2008 static void qemu_kvm_start_vcpu(CPUState *cpu)
2009 {
2010 char thread_name[VCPU_THREAD_NAME_SIZE];
2011
2012 cpu->thread = g_malloc0(sizeof(QemuThread));
2013 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2014 qemu_cond_init(cpu->halt_cond);
2015 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2016 cpu->cpu_index);
2017 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2018 cpu, QEMU_THREAD_JOINABLE);
2019 }
2020
2021 static void qemu_hvf_start_vcpu(CPUState *cpu)
2022 {
2023 char thread_name[VCPU_THREAD_NAME_SIZE];
2024
2025 /* HVF currently does not support TCG, and only runs in
2026 * unrestricted-guest mode. */
2027 assert(hvf_enabled());
2028
2029 cpu->thread = g_malloc0(sizeof(QemuThread));
2030 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2031 qemu_cond_init(cpu->halt_cond);
2032
2033 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2034 cpu->cpu_index);
2035 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2036 cpu, QEMU_THREAD_JOINABLE);
2037 }
2038
2039 static void qemu_whpx_start_vcpu(CPUState *cpu)
2040 {
2041 char thread_name[VCPU_THREAD_NAME_SIZE];
2042
2043 cpu->thread = g_malloc0(sizeof(QemuThread));
2044 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2045 qemu_cond_init(cpu->halt_cond);
2046 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2047 cpu->cpu_index);
2048 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2049 cpu, QEMU_THREAD_JOINABLE);
2050 #ifdef _WIN32
2051 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2052 #endif
2053 }
2054
2055 static void qemu_dummy_start_vcpu(CPUState *cpu)
2056 {
2057 char thread_name[VCPU_THREAD_NAME_SIZE];
2058
2059 cpu->thread = g_malloc0(sizeof(QemuThread));
2060 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2061 qemu_cond_init(cpu->halt_cond);
2062 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2063 cpu->cpu_index);
2064 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2065 QEMU_THREAD_JOINABLE);
2066 }
2067
2068 void qemu_init_vcpu(CPUState *cpu)
2069 {
2070 cpu->nr_cores = smp_cores;
2071 cpu->nr_threads = smp_threads;
2072 cpu->stopped = true;
2073
2074 if (!cpu->as) {
2075 /* If the target cpu hasn't set up any address spaces itself,
2076 * give it the default one.
2077 */
2078 cpu->num_ases = 1;
2079 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2080 }
2081
2082 if (kvm_enabled()) {
2083 qemu_kvm_start_vcpu(cpu);
2084 } else if (hax_enabled()) {
2085 qemu_hax_start_vcpu(cpu);
2086 } else if (hvf_enabled()) {
2087 qemu_hvf_start_vcpu(cpu);
2088 } else if (tcg_enabled()) {
2089 qemu_tcg_init_vcpu(cpu);
2090 } else if (whpx_enabled()) {
2091 qemu_whpx_start_vcpu(cpu);
2092 } else {
2093 qemu_dummy_start_vcpu(cpu);
2094 }
2095
2096 while (!cpu->created) {
2097 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2098 }
2099 }
2100
2101 void cpu_stop_current(void)
2102 {
2103 if (current_cpu) {
2104 current_cpu->stop = true;
2105 cpu_exit(current_cpu);
2106 }
2107 }
2108
2109 int vm_stop(RunState state)
2110 {
2111 if (qemu_in_vcpu_thread()) {
2112 qemu_system_vmstop_request_prepare();
2113 qemu_system_vmstop_request(state);
2114 /*
2115 * FIXME: should not return to device code in case
2116 * vm_stop() has been requested.
2117 */
2118 cpu_stop_current();
2119 return 0;
2120 }
2121
2122 return do_vm_stop(state, true);
2123 }
2124
2125 /**
2126 * Prepare for (re)starting the VM.
2127 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2128 * running or in case of an error condition), 0 otherwise.
2129 */
2130 int vm_prepare_start(void)
2131 {
2132 RunState requested;
2133
2134 qemu_vmstop_requested(&requested);
2135 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2136 return -1;
2137 }
2138
2139 /* Ensure that a STOP/RESUME pair of events is emitted if a
2140 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2141 * example, according to documentation is always followed by
2142 * the STOP event.
2143 */
2144 if (runstate_is_running()) {
2145 qapi_event_send_stop();
2146 qapi_event_send_resume();
2147 return -1;
2148 }
2149
2150 /* We are sending this now, but the CPUs will be resumed shortly later */
2151 qapi_event_send_resume();
2152
2153 replay_enable_events();
2154 cpu_enable_ticks();
2155 runstate_set(RUN_STATE_RUNNING);
2156 vm_state_notify(1, RUN_STATE_RUNNING);
2157 return 0;
2158 }
2159
2160 void vm_start(void)
2161 {
2162 if (!vm_prepare_start()) {
2163 resume_all_vcpus();
2164 }
2165 }
2166
2167 /* does a state transition even if the VM is already stopped,
2168 current state is forgotten forever */
2169 int vm_stop_force_state(RunState state)
2170 {
2171 if (runstate_is_running()) {
2172 return vm_stop(state);
2173 } else {
2174 runstate_set(state);
2175
2176 bdrv_drain_all();
2177 /* Make sure to return an error if the flush in a previous vm_stop()
2178 * failed. */
2179 return bdrv_flush_all();
2180 }
2181 }
2182
2183 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
2184 {
2185 /* XXX: implement xxx_cpu_list for targets that still miss it */
2186 #if defined(cpu_list)
2187 cpu_list(f, cpu_fprintf);
2188 #endif
2189 }
2190
2191 CpuInfoList *qmp_query_cpus(Error **errp)
2192 {
2193 MachineState *ms = MACHINE(qdev_get_machine());
2194 MachineClass *mc = MACHINE_GET_CLASS(ms);
2195 CpuInfoList *head = NULL, *cur_item = NULL;
2196 CPUState *cpu;
2197
2198 CPU_FOREACH(cpu) {
2199 CpuInfoList *info;
2200 #if defined(TARGET_I386)
2201 X86CPU *x86_cpu = X86_CPU(cpu);
2202 CPUX86State *env = &x86_cpu->env;
2203 #elif defined(TARGET_PPC)
2204 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
2205 CPUPPCState *env = &ppc_cpu->env;
2206 #elif defined(TARGET_SPARC)
2207 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
2208 CPUSPARCState *env = &sparc_cpu->env;
2209 #elif defined(TARGET_RISCV)
2210 RISCVCPU *riscv_cpu = RISCV_CPU(cpu);
2211 CPURISCVState *env = &riscv_cpu->env;
2212 #elif defined(TARGET_MIPS)
2213 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2214 CPUMIPSState *env = &mips_cpu->env;
2215 #elif defined(TARGET_TRICORE)
2216 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2217 CPUTriCoreState *env = &tricore_cpu->env;
2218 #elif defined(TARGET_S390X)
2219 S390CPU *s390_cpu = S390_CPU(cpu);
2220 CPUS390XState *env = &s390_cpu->env;
2221 #endif
2222
2223 cpu_synchronize_state(cpu);
2224
2225 info = g_malloc0(sizeof(*info));
2226 info->value = g_malloc0(sizeof(*info->value));
2227 info->value->CPU = cpu->cpu_index;
2228 info->value->current = (cpu == first_cpu);
2229 info->value->halted = cpu->halted;
2230 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2231 info->value->thread_id = cpu->thread_id;
2232 #if defined(TARGET_I386)
2233 info->value->arch = CPU_INFO_ARCH_X86;
2234 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2235 #elif defined(TARGET_PPC)
2236 info->value->arch = CPU_INFO_ARCH_PPC;
2237 info->value->u.ppc.nip = env->nip;
2238 #elif defined(TARGET_SPARC)
2239 info->value->arch = CPU_INFO_ARCH_SPARC;
2240 info->value->u.q_sparc.pc = env->pc;
2241 info->value->u.q_sparc.npc = env->npc;
2242 #elif defined(TARGET_MIPS)
2243 info->value->arch = CPU_INFO_ARCH_MIPS;
2244 info->value->u.q_mips.PC = env->active_tc.PC;
2245 #elif defined(TARGET_TRICORE)
2246 info->value->arch = CPU_INFO_ARCH_TRICORE;
2247 info->value->u.tricore.PC = env->PC;
2248 #elif defined(TARGET_S390X)
2249 info->value->arch = CPU_INFO_ARCH_S390;
2250 info->value->u.s390.cpu_state = env->cpu_state;
2251 #elif defined(TARGET_RISCV)
2252 info->value->arch = CPU_INFO_ARCH_RISCV;
2253 info->value->u.riscv.pc = env->pc;
2254 #else
2255 info->value->arch = CPU_INFO_ARCH_OTHER;
2256 #endif
2257 info->value->has_props = !!mc->cpu_index_to_instance_props;
2258 if (info->value->has_props) {
2259 CpuInstanceProperties *props;
2260 props = g_malloc0(sizeof(*props));
2261 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2262 info->value->props = props;
2263 }
2264
2265 /* XXX: waiting for the qapi to support GSList */
2266 if (!cur_item) {
2267 head = cur_item = info;
2268 } else {
2269 cur_item->next = info;
2270 cur_item = info;
2271 }
2272 }
2273
2274 return head;
2275 }
2276
2277 static CpuInfoArch sysemu_target_to_cpuinfo_arch(SysEmuTarget target)
2278 {
2279 /*
2280 * The @SysEmuTarget -> @CpuInfoArch mapping below is based on the
2281 * TARGET_ARCH -> TARGET_BASE_ARCH mapping in the "configure" script.
2282 */
2283 switch (target) {
2284 case SYS_EMU_TARGET_I386:
2285 case SYS_EMU_TARGET_X86_64:
2286 return CPU_INFO_ARCH_X86;
2287
2288 case SYS_EMU_TARGET_PPC:
2289 case SYS_EMU_TARGET_PPC64:
2290 return CPU_INFO_ARCH_PPC;
2291
2292 case SYS_EMU_TARGET_SPARC:
2293 case SYS_EMU_TARGET_SPARC64:
2294 return CPU_INFO_ARCH_SPARC;
2295
2296 case SYS_EMU_TARGET_MIPS:
2297 case SYS_EMU_TARGET_MIPSEL:
2298 case SYS_EMU_TARGET_MIPS64:
2299 case SYS_EMU_TARGET_MIPS64EL:
2300 return CPU_INFO_ARCH_MIPS;
2301
2302 case SYS_EMU_TARGET_TRICORE:
2303 return CPU_INFO_ARCH_TRICORE;
2304
2305 case SYS_EMU_TARGET_S390X:
2306 return CPU_INFO_ARCH_S390;
2307
2308 case SYS_EMU_TARGET_RISCV32:
2309 case SYS_EMU_TARGET_RISCV64:
2310 return CPU_INFO_ARCH_RISCV;
2311
2312 default:
2313 return CPU_INFO_ARCH_OTHER;
2314 }
2315 }
2316
2317 static void cpustate_to_cpuinfo_s390(CpuInfoS390 *info, const CPUState *cpu)
2318 {
2319 #ifdef TARGET_S390X
2320 S390CPU *s390_cpu = S390_CPU(cpu);
2321 CPUS390XState *env = &s390_cpu->env;
2322
2323 info->cpu_state = env->cpu_state;
2324 #else
2325 abort();
2326 #endif
2327 }
2328
2329 /*
2330 * fast means: we NEVER interrupt vCPU threads to retrieve
2331 * information from KVM.
2332 */
2333 CpuInfoFastList *qmp_query_cpus_fast(Error **errp)
2334 {
2335 MachineState *ms = MACHINE(qdev_get_machine());
2336 MachineClass *mc = MACHINE_GET_CLASS(ms);
2337 CpuInfoFastList *head = NULL, *cur_item = NULL;
2338 SysEmuTarget target = qapi_enum_parse(&SysEmuTarget_lookup, TARGET_NAME,
2339 -1, &error_abort);
2340 CPUState *cpu;
2341
2342 CPU_FOREACH(cpu) {
2343 CpuInfoFastList *info = g_malloc0(sizeof(*info));
2344 info->value = g_malloc0(sizeof(*info->value));
2345
2346 info->value->cpu_index = cpu->cpu_index;
2347 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2348 info->value->thread_id = cpu->thread_id;
2349
2350 info->value->has_props = !!mc->cpu_index_to_instance_props;
2351 if (info->value->has_props) {
2352 CpuInstanceProperties *props;
2353 props = g_malloc0(sizeof(*props));
2354 *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2355 info->value->props = props;
2356 }
2357
2358 info->value->arch = sysemu_target_to_cpuinfo_arch(target);
2359 info->value->target = target;
2360 if (target == SYS_EMU_TARGET_S390X) {
2361 cpustate_to_cpuinfo_s390(&info->value->u.s390x, cpu);
2362 }
2363
2364 if (!cur_item) {
2365 head = cur_item = info;
2366 } else {
2367 cur_item->next = info;
2368 cur_item = info;
2369 }
2370 }
2371
2372 return head;
2373 }
2374
2375 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2376 bool has_cpu, int64_t cpu_index, Error **errp)
2377 {
2378 FILE *f;
2379 uint32_t l;
2380 CPUState *cpu;
2381 uint8_t buf[1024];
2382 int64_t orig_addr = addr, orig_size = size;
2383
2384 if (!has_cpu) {
2385 cpu_index = 0;
2386 }
2387
2388 cpu = qemu_get_cpu(cpu_index);
2389 if (cpu == NULL) {
2390 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2391 "a CPU number");
2392 return;
2393 }
2394
2395 f = fopen(filename, "wb");
2396 if (!f) {
2397 error_setg_file_open(errp, errno, filename);
2398 return;
2399 }
2400
2401 while (size != 0) {
2402 l = sizeof(buf);
2403 if (l > size)
2404 l = size;
2405 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2406 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2407 " specified", orig_addr, orig_size);
2408 goto exit;
2409 }
2410 if (fwrite(buf, 1, l, f) != l) {
2411 error_setg(errp, QERR_IO_ERROR);
2412 goto exit;
2413 }
2414 addr += l;
2415 size -= l;
2416 }
2417
2418 exit:
2419 fclose(f);
2420 }
2421
2422 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2423 Error **errp)
2424 {
2425 FILE *f;
2426 uint32_t l;
2427 uint8_t buf[1024];
2428
2429 f = fopen(filename, "wb");
2430 if (!f) {
2431 error_setg_file_open(errp, errno, filename);
2432 return;
2433 }
2434
2435 while (size != 0) {
2436 l = sizeof(buf);
2437 if (l > size)
2438 l = size;
2439 cpu_physical_memory_read(addr, buf, l);
2440 if (fwrite(buf, 1, l, f) != l) {
2441 error_setg(errp, QERR_IO_ERROR);
2442 goto exit;
2443 }
2444 addr += l;
2445 size -= l;
2446 }
2447
2448 exit:
2449 fclose(f);
2450 }
2451
2452 void qmp_inject_nmi(Error **errp)
2453 {
2454 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2455 }
2456
2457 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2458 {
2459 if (!use_icount) {
2460 return;
2461 }
2462
2463 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
2464 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2465 if (icount_align_option) {
2466 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
2467 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
2468 } else {
2469 cpu_fprintf(f, "Max guest delay NA\n");
2470 cpu_fprintf(f, "Max guest advance NA\n");
2471 }
2472 }