]> git.proxmox.com Git - mirror_qemu.git/blob - cpus.c
exec: set map length to zero when returning NULL
[mirror_qemu.git] / cpus.c
1 /*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu/osdep.h"
26 #include "qemu-common.h"
27 #include "qemu/config-file.h"
28 #include "qemu/cutils.h"
29 #include "migration/vmstate.h"
30 #include "monitor/monitor.h"
31 #include "qapi/error.h"
32 #include "qapi/qapi-commands-misc.h"
33 #include "qapi/qapi-events-run-state.h"
34 #include "qapi/qmp/qerror.h"
35 #include "qemu/error-report.h"
36 #include "qemu/qemu-print.h"
37 #include "sysemu/tcg.h"
38 #include "sysemu/block-backend.h"
39 #include "exec/gdbstub.h"
40 #include "sysemu/dma.h"
41 #include "sysemu/hw_accel.h"
42 #include "sysemu/kvm.h"
43 #include "sysemu/hax.h"
44 #include "sysemu/hvf.h"
45 #include "sysemu/whpx.h"
46 #include "exec/exec-all.h"
47
48 #include "qemu/thread.h"
49 #include "qemu/plugin.h"
50 #include "sysemu/cpus.h"
51 #include "sysemu/qtest.h"
52 #include "qemu/main-loop.h"
53 #include "qemu/option.h"
54 #include "qemu/bitmap.h"
55 #include "qemu/seqlock.h"
56 #include "qemu/guest-random.h"
57 #include "tcg/tcg.h"
58 #include "hw/nmi.h"
59 #include "sysemu/replay.h"
60 #include "sysemu/runstate.h"
61 #include "hw/boards.h"
62 #include "hw/hw.h"
63
64 #ifdef CONFIG_LINUX
65
66 #include <sys/prctl.h>
67
68 #ifndef PR_MCE_KILL
69 #define PR_MCE_KILL 33
70 #endif
71
72 #ifndef PR_MCE_KILL_SET
73 #define PR_MCE_KILL_SET 1
74 #endif
75
76 #ifndef PR_MCE_KILL_EARLY
77 #define PR_MCE_KILL_EARLY 1
78 #endif
79
80 #endif /* CONFIG_LINUX */
81
82 static QemuMutex qemu_global_mutex;
83
84 int64_t max_delay;
85 int64_t max_advance;
86
87 /* vcpu throttling controls */
88 static QEMUTimer *throttle_timer;
89 static unsigned int throttle_percentage;
90
91 #define CPU_THROTTLE_PCT_MIN 1
92 #define CPU_THROTTLE_PCT_MAX 99
93 #define CPU_THROTTLE_TIMESLICE_NS 10000000
94
95 bool cpu_is_stopped(CPUState *cpu)
96 {
97 return cpu->stopped || !runstate_is_running();
98 }
99
100 static bool cpu_thread_is_idle(CPUState *cpu)
101 {
102 if (cpu->stop || cpu->queued_work_first) {
103 return false;
104 }
105 if (cpu_is_stopped(cpu)) {
106 return true;
107 }
108 if (!cpu->halted || cpu_has_work(cpu) ||
109 kvm_halt_in_kernel()) {
110 return false;
111 }
112 return true;
113 }
114
115 static bool all_cpu_threads_idle(void)
116 {
117 CPUState *cpu;
118
119 CPU_FOREACH(cpu) {
120 if (!cpu_thread_is_idle(cpu)) {
121 return false;
122 }
123 }
124 return true;
125 }
126
127 /***********************************************************/
128 /* guest cycle counter */
129
130 /* Protected by TimersState seqlock */
131
132 static bool icount_sleep = true;
133 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
134 #define MAX_ICOUNT_SHIFT 10
135
136 typedef struct TimersState {
137 /* Protected by BQL. */
138 int64_t cpu_ticks_prev;
139 int64_t cpu_ticks_offset;
140
141 /* Protect fields that can be respectively read outside the
142 * BQL, and written from multiple threads.
143 */
144 QemuSeqLock vm_clock_seqlock;
145 QemuSpin vm_clock_lock;
146
147 int16_t cpu_ticks_enabled;
148
149 /* Conversion factor from emulated instructions to virtual clock ticks. */
150 int16_t icount_time_shift;
151
152 /* Compensate for varying guest execution speed. */
153 int64_t qemu_icount_bias;
154
155 int64_t vm_clock_warp_start;
156 int64_t cpu_clock_offset;
157
158 /* Only written by TCG thread */
159 int64_t qemu_icount;
160
161 /* for adjusting icount */
162 QEMUTimer *icount_rt_timer;
163 QEMUTimer *icount_vm_timer;
164 QEMUTimer *icount_warp_timer;
165 } TimersState;
166
167 static TimersState timers_state;
168 bool mttcg_enabled;
169
170
171 /* The current number of executed instructions is based on what we
172 * originally budgeted minus the current state of the decrementing
173 * icount counters in extra/u16.low.
174 */
175 static int64_t cpu_get_icount_executed(CPUState *cpu)
176 {
177 return (cpu->icount_budget -
178 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
179 }
180
181 /*
182 * Update the global shared timer_state.qemu_icount to take into
183 * account executed instructions. This is done by the TCG vCPU
184 * thread so the main-loop can see time has moved forward.
185 */
186 static void cpu_update_icount_locked(CPUState *cpu)
187 {
188 int64_t executed = cpu_get_icount_executed(cpu);
189 cpu->icount_budget -= executed;
190
191 atomic_set_i64(&timers_state.qemu_icount,
192 timers_state.qemu_icount + executed);
193 }
194
195 /*
196 * Update the global shared timer_state.qemu_icount to take into
197 * account executed instructions. This is done by the TCG vCPU
198 * thread so the main-loop can see time has moved forward.
199 */
200 void cpu_update_icount(CPUState *cpu)
201 {
202 seqlock_write_lock(&timers_state.vm_clock_seqlock,
203 &timers_state.vm_clock_lock);
204 cpu_update_icount_locked(cpu);
205 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
206 &timers_state.vm_clock_lock);
207 }
208
209 static int64_t cpu_get_icount_raw_locked(void)
210 {
211 CPUState *cpu = current_cpu;
212
213 if (cpu && cpu->running) {
214 if (!cpu->can_do_io) {
215 error_report("Bad icount read");
216 exit(1);
217 }
218 /* Take into account what has run */
219 cpu_update_icount_locked(cpu);
220 }
221 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
222 return atomic_read_i64(&timers_state.qemu_icount);
223 }
224
225 static int64_t cpu_get_icount_locked(void)
226 {
227 int64_t icount = cpu_get_icount_raw_locked();
228 return atomic_read_i64(&timers_state.qemu_icount_bias) +
229 cpu_icount_to_ns(icount);
230 }
231
232 int64_t cpu_get_icount_raw(void)
233 {
234 int64_t icount;
235 unsigned start;
236
237 do {
238 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
239 icount = cpu_get_icount_raw_locked();
240 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
241
242 return icount;
243 }
244
245 /* Return the virtual CPU time, based on the instruction counter. */
246 int64_t cpu_get_icount(void)
247 {
248 int64_t icount;
249 unsigned start;
250
251 do {
252 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253 icount = cpu_get_icount_locked();
254 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
255
256 return icount;
257 }
258
259 int64_t cpu_icount_to_ns(int64_t icount)
260 {
261 return icount << atomic_read(&timers_state.icount_time_shift);
262 }
263
264 static int64_t cpu_get_ticks_locked(void)
265 {
266 int64_t ticks = timers_state.cpu_ticks_offset;
267 if (timers_state.cpu_ticks_enabled) {
268 ticks += cpu_get_host_ticks();
269 }
270
271 if (timers_state.cpu_ticks_prev > ticks) {
272 /* Non increasing ticks may happen if the host uses software suspend. */
273 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
274 ticks = timers_state.cpu_ticks_prev;
275 }
276
277 timers_state.cpu_ticks_prev = ticks;
278 return ticks;
279 }
280
281 /* return the time elapsed in VM between vm_start and vm_stop. Unless
282 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
283 * counter.
284 */
285 int64_t cpu_get_ticks(void)
286 {
287 int64_t ticks;
288
289 if (use_icount) {
290 return cpu_get_icount();
291 }
292
293 qemu_spin_lock(&timers_state.vm_clock_lock);
294 ticks = cpu_get_ticks_locked();
295 qemu_spin_unlock(&timers_state.vm_clock_lock);
296 return ticks;
297 }
298
299 static int64_t cpu_get_clock_locked(void)
300 {
301 int64_t time;
302
303 time = timers_state.cpu_clock_offset;
304 if (timers_state.cpu_ticks_enabled) {
305 time += get_clock();
306 }
307
308 return time;
309 }
310
311 /* Return the monotonic time elapsed in VM, i.e.,
312 * the time between vm_start and vm_stop
313 */
314 int64_t cpu_get_clock(void)
315 {
316 int64_t ti;
317 unsigned start;
318
319 do {
320 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
321 ti = cpu_get_clock_locked();
322 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
323
324 return ti;
325 }
326
327 /* enable cpu_get_ticks()
328 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
329 */
330 void cpu_enable_ticks(void)
331 {
332 seqlock_write_lock(&timers_state.vm_clock_seqlock,
333 &timers_state.vm_clock_lock);
334 if (!timers_state.cpu_ticks_enabled) {
335 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
336 timers_state.cpu_clock_offset -= get_clock();
337 timers_state.cpu_ticks_enabled = 1;
338 }
339 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
340 &timers_state.vm_clock_lock);
341 }
342
343 /* disable cpu_get_ticks() : the clock is stopped. You must not call
344 * cpu_get_ticks() after that.
345 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
346 */
347 void cpu_disable_ticks(void)
348 {
349 seqlock_write_lock(&timers_state.vm_clock_seqlock,
350 &timers_state.vm_clock_lock);
351 if (timers_state.cpu_ticks_enabled) {
352 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
353 timers_state.cpu_clock_offset = cpu_get_clock_locked();
354 timers_state.cpu_ticks_enabled = 0;
355 }
356 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
357 &timers_state.vm_clock_lock);
358 }
359
360 /* Correlation between real and virtual time is always going to be
361 fairly approximate, so ignore small variation.
362 When the guest is idle real and virtual time will be aligned in
363 the IO wait loop. */
364 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
365
366 static void icount_adjust(void)
367 {
368 int64_t cur_time;
369 int64_t cur_icount;
370 int64_t delta;
371
372 /* Protected by TimersState mutex. */
373 static int64_t last_delta;
374
375 /* If the VM is not running, then do nothing. */
376 if (!runstate_is_running()) {
377 return;
378 }
379
380 seqlock_write_lock(&timers_state.vm_clock_seqlock,
381 &timers_state.vm_clock_lock);
382 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
383 cpu_get_clock_locked());
384 cur_icount = cpu_get_icount_locked();
385
386 delta = cur_icount - cur_time;
387 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
388 if (delta > 0
389 && last_delta + ICOUNT_WOBBLE < delta * 2
390 && timers_state.icount_time_shift > 0) {
391 /* The guest is getting too far ahead. Slow time down. */
392 atomic_set(&timers_state.icount_time_shift,
393 timers_state.icount_time_shift - 1);
394 }
395 if (delta < 0
396 && last_delta - ICOUNT_WOBBLE > delta * 2
397 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
398 /* The guest is getting too far behind. Speed time up. */
399 atomic_set(&timers_state.icount_time_shift,
400 timers_state.icount_time_shift + 1);
401 }
402 last_delta = delta;
403 atomic_set_i64(&timers_state.qemu_icount_bias,
404 cur_icount - (timers_state.qemu_icount
405 << timers_state.icount_time_shift));
406 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
407 &timers_state.vm_clock_lock);
408 }
409
410 static void icount_adjust_rt(void *opaque)
411 {
412 timer_mod(timers_state.icount_rt_timer,
413 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
414 icount_adjust();
415 }
416
417 static void icount_adjust_vm(void *opaque)
418 {
419 timer_mod(timers_state.icount_vm_timer,
420 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
421 NANOSECONDS_PER_SECOND / 10);
422 icount_adjust();
423 }
424
425 static int64_t qemu_icount_round(int64_t count)
426 {
427 int shift = atomic_read(&timers_state.icount_time_shift);
428 return (count + (1 << shift) - 1) >> shift;
429 }
430
431 static void icount_warp_rt(void)
432 {
433 unsigned seq;
434 int64_t warp_start;
435
436 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
437 * changes from -1 to another value, so the race here is okay.
438 */
439 do {
440 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
441 warp_start = timers_state.vm_clock_warp_start;
442 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
443
444 if (warp_start == -1) {
445 return;
446 }
447
448 seqlock_write_lock(&timers_state.vm_clock_seqlock,
449 &timers_state.vm_clock_lock);
450 if (runstate_is_running()) {
451 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
452 cpu_get_clock_locked());
453 int64_t warp_delta;
454
455 warp_delta = clock - timers_state.vm_clock_warp_start;
456 if (use_icount == 2) {
457 /*
458 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
459 * far ahead of real time.
460 */
461 int64_t cur_icount = cpu_get_icount_locked();
462 int64_t delta = clock - cur_icount;
463 warp_delta = MIN(warp_delta, delta);
464 }
465 atomic_set_i64(&timers_state.qemu_icount_bias,
466 timers_state.qemu_icount_bias + warp_delta);
467 }
468 timers_state.vm_clock_warp_start = -1;
469 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
470 &timers_state.vm_clock_lock);
471
472 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
473 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
474 }
475 }
476
477 static void icount_timer_cb(void *opaque)
478 {
479 /* No need for a checkpoint because the timer already synchronizes
480 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
481 */
482 icount_warp_rt();
483 }
484
485 void qtest_clock_warp(int64_t dest)
486 {
487 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
488 AioContext *aio_context;
489 assert(qtest_enabled());
490 aio_context = qemu_get_aio_context();
491 while (clock < dest) {
492 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
493 QEMU_TIMER_ATTR_ALL);
494 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
495
496 seqlock_write_lock(&timers_state.vm_clock_seqlock,
497 &timers_state.vm_clock_lock);
498 atomic_set_i64(&timers_state.qemu_icount_bias,
499 timers_state.qemu_icount_bias + warp);
500 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
501 &timers_state.vm_clock_lock);
502
503 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
504 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
505 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
506 }
507 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
508 }
509
510 void qemu_start_warp_timer(void)
511 {
512 int64_t clock;
513 int64_t deadline;
514
515 if (!use_icount) {
516 return;
517 }
518
519 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
520 * do not fire, so computing the deadline does not make sense.
521 */
522 if (!runstate_is_running()) {
523 return;
524 }
525
526 if (replay_mode != REPLAY_MODE_PLAY) {
527 if (!all_cpu_threads_idle()) {
528 return;
529 }
530
531 if (qtest_enabled()) {
532 /* When testing, qtest commands advance icount. */
533 return;
534 }
535
536 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
537 } else {
538 /* warp clock deterministically in record/replay mode */
539 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
540 /* vCPU is sleeping and warp can't be started.
541 It is probably a race condition: notification sent
542 to vCPU was processed in advance and vCPU went to sleep.
543 Therefore we have to wake it up for doing someting. */
544 if (replay_has_checkpoint()) {
545 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
546 }
547 return;
548 }
549 }
550
551 /* We want to use the earliest deadline from ALL vm_clocks */
552 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
553 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
554 ~QEMU_TIMER_ATTR_EXTERNAL);
555 if (deadline < 0) {
556 static bool notified;
557 if (!icount_sleep && !notified) {
558 warn_report("icount sleep disabled and no active timers");
559 notified = true;
560 }
561 return;
562 }
563
564 if (deadline > 0) {
565 /*
566 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
567 * sleep. Otherwise, the CPU might be waiting for a future timer
568 * interrupt to wake it up, but the interrupt never comes because
569 * the vCPU isn't running any insns and thus doesn't advance the
570 * QEMU_CLOCK_VIRTUAL.
571 */
572 if (!icount_sleep) {
573 /*
574 * We never let VCPUs sleep in no sleep icount mode.
575 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
576 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
577 * It is useful when we want a deterministic execution time,
578 * isolated from host latencies.
579 */
580 seqlock_write_lock(&timers_state.vm_clock_seqlock,
581 &timers_state.vm_clock_lock);
582 atomic_set_i64(&timers_state.qemu_icount_bias,
583 timers_state.qemu_icount_bias + deadline);
584 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
585 &timers_state.vm_clock_lock);
586 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
587 } else {
588 /*
589 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
590 * "real" time, (related to the time left until the next event) has
591 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
592 * This avoids that the warps are visible externally; for example,
593 * you will not be sending network packets continuously instead of
594 * every 100ms.
595 */
596 seqlock_write_lock(&timers_state.vm_clock_seqlock,
597 &timers_state.vm_clock_lock);
598 if (timers_state.vm_clock_warp_start == -1
599 || timers_state.vm_clock_warp_start > clock) {
600 timers_state.vm_clock_warp_start = clock;
601 }
602 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
603 &timers_state.vm_clock_lock);
604 timer_mod_anticipate(timers_state.icount_warp_timer,
605 clock + deadline);
606 }
607 } else if (deadline == 0) {
608 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
609 }
610 }
611
612 static void qemu_account_warp_timer(void)
613 {
614 if (!use_icount || !icount_sleep) {
615 return;
616 }
617
618 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
619 * do not fire, so computing the deadline does not make sense.
620 */
621 if (!runstate_is_running()) {
622 return;
623 }
624
625 /* warp clock deterministically in record/replay mode */
626 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
627 return;
628 }
629
630 timer_del(timers_state.icount_warp_timer);
631 icount_warp_rt();
632 }
633
634 static bool icount_state_needed(void *opaque)
635 {
636 return use_icount;
637 }
638
639 static bool warp_timer_state_needed(void *opaque)
640 {
641 TimersState *s = opaque;
642 return s->icount_warp_timer != NULL;
643 }
644
645 static bool adjust_timers_state_needed(void *opaque)
646 {
647 TimersState *s = opaque;
648 return s->icount_rt_timer != NULL;
649 }
650
651 static bool shift_state_needed(void *opaque)
652 {
653 return use_icount == 2;
654 }
655
656 /*
657 * Subsection for warp timer migration is optional, because may not be created
658 */
659 static const VMStateDescription icount_vmstate_warp_timer = {
660 .name = "timer/icount/warp_timer",
661 .version_id = 1,
662 .minimum_version_id = 1,
663 .needed = warp_timer_state_needed,
664 .fields = (VMStateField[]) {
665 VMSTATE_INT64(vm_clock_warp_start, TimersState),
666 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
667 VMSTATE_END_OF_LIST()
668 }
669 };
670
671 static const VMStateDescription icount_vmstate_adjust_timers = {
672 .name = "timer/icount/timers",
673 .version_id = 1,
674 .minimum_version_id = 1,
675 .needed = adjust_timers_state_needed,
676 .fields = (VMStateField[]) {
677 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
678 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
679 VMSTATE_END_OF_LIST()
680 }
681 };
682
683 static const VMStateDescription icount_vmstate_shift = {
684 .name = "timer/icount/shift",
685 .version_id = 1,
686 .minimum_version_id = 1,
687 .needed = shift_state_needed,
688 .fields = (VMStateField[]) {
689 VMSTATE_INT16(icount_time_shift, TimersState),
690 VMSTATE_END_OF_LIST()
691 }
692 };
693
694 /*
695 * This is a subsection for icount migration.
696 */
697 static const VMStateDescription icount_vmstate_timers = {
698 .name = "timer/icount",
699 .version_id = 1,
700 .minimum_version_id = 1,
701 .needed = icount_state_needed,
702 .fields = (VMStateField[]) {
703 VMSTATE_INT64(qemu_icount_bias, TimersState),
704 VMSTATE_INT64(qemu_icount, TimersState),
705 VMSTATE_END_OF_LIST()
706 },
707 .subsections = (const VMStateDescription*[]) {
708 &icount_vmstate_warp_timer,
709 &icount_vmstate_adjust_timers,
710 &icount_vmstate_shift,
711 NULL
712 }
713 };
714
715 static const VMStateDescription vmstate_timers = {
716 .name = "timer",
717 .version_id = 2,
718 .minimum_version_id = 1,
719 .fields = (VMStateField[]) {
720 VMSTATE_INT64(cpu_ticks_offset, TimersState),
721 VMSTATE_UNUSED(8),
722 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
723 VMSTATE_END_OF_LIST()
724 },
725 .subsections = (const VMStateDescription*[]) {
726 &icount_vmstate_timers,
727 NULL
728 }
729 };
730
731 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
732 {
733 double pct;
734 double throttle_ratio;
735 int64_t sleeptime_ns, endtime_ns;
736
737 if (!cpu_throttle_get_percentage()) {
738 return;
739 }
740
741 pct = (double)cpu_throttle_get_percentage()/100;
742 throttle_ratio = pct / (1 - pct);
743 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
744 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
745 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
746 while (sleeptime_ns > 0 && !cpu->stop) {
747 if (sleeptime_ns > SCALE_MS) {
748 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
749 sleeptime_ns / SCALE_MS);
750 } else {
751 qemu_mutex_unlock_iothread();
752 g_usleep(sleeptime_ns / SCALE_US);
753 qemu_mutex_lock_iothread();
754 }
755 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
756 }
757 atomic_set(&cpu->throttle_thread_scheduled, 0);
758 }
759
760 static void cpu_throttle_timer_tick(void *opaque)
761 {
762 CPUState *cpu;
763 double pct;
764
765 /* Stop the timer if needed */
766 if (!cpu_throttle_get_percentage()) {
767 return;
768 }
769 CPU_FOREACH(cpu) {
770 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
771 async_run_on_cpu(cpu, cpu_throttle_thread,
772 RUN_ON_CPU_NULL);
773 }
774 }
775
776 pct = (double)cpu_throttle_get_percentage()/100;
777 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
778 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
779 }
780
781 void cpu_throttle_set(int new_throttle_pct)
782 {
783 /* Ensure throttle percentage is within valid range */
784 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
785 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
786
787 atomic_set(&throttle_percentage, new_throttle_pct);
788
789 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
790 CPU_THROTTLE_TIMESLICE_NS);
791 }
792
793 void cpu_throttle_stop(void)
794 {
795 atomic_set(&throttle_percentage, 0);
796 }
797
798 bool cpu_throttle_active(void)
799 {
800 return (cpu_throttle_get_percentage() != 0);
801 }
802
803 int cpu_throttle_get_percentage(void)
804 {
805 return atomic_read(&throttle_percentage);
806 }
807
808 void cpu_ticks_init(void)
809 {
810 seqlock_init(&timers_state.vm_clock_seqlock);
811 qemu_spin_init(&timers_state.vm_clock_lock);
812 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
813 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
814 cpu_throttle_timer_tick, NULL);
815 }
816
817 void configure_icount(QemuOpts *opts, Error **errp)
818 {
819 const char *option = qemu_opt_get(opts, "shift");
820 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
821 bool align = qemu_opt_get_bool(opts, "align", false);
822 long time_shift = -1;
823
824 if (!option) {
825 if (qemu_opt_get(opts, "align") != NULL) {
826 error_setg(errp, "Please specify shift option when using align");
827 }
828 return;
829 }
830
831 if (align && !sleep) {
832 error_setg(errp, "align=on and sleep=off are incompatible");
833 return;
834 }
835
836 if (strcmp(option, "auto") != 0) {
837 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
838 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
839 error_setg(errp, "icount: Invalid shift value");
840 return;
841 }
842 } else if (icount_align_option) {
843 error_setg(errp, "shift=auto and align=on are incompatible");
844 return;
845 } else if (!icount_sleep) {
846 error_setg(errp, "shift=auto and sleep=off are incompatible");
847 return;
848 }
849
850 icount_sleep = sleep;
851 if (icount_sleep) {
852 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
853 icount_timer_cb, NULL);
854 }
855
856 icount_align_option = align;
857
858 if (time_shift >= 0) {
859 timers_state.icount_time_shift = time_shift;
860 use_icount = 1;
861 return;
862 }
863
864 use_icount = 2;
865
866 /* 125MIPS seems a reasonable initial guess at the guest speed.
867 It will be corrected fairly quickly anyway. */
868 timers_state.icount_time_shift = 3;
869
870 /* Have both realtime and virtual time triggers for speed adjustment.
871 The realtime trigger catches emulated time passing too slowly,
872 the virtual time trigger catches emulated time passing too fast.
873 Realtime triggers occur even when idle, so use them less frequently
874 than VM triggers. */
875 timers_state.vm_clock_warp_start = -1;
876 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
877 icount_adjust_rt, NULL);
878 timer_mod(timers_state.icount_rt_timer,
879 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
880 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
881 icount_adjust_vm, NULL);
882 timer_mod(timers_state.icount_vm_timer,
883 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
884 NANOSECONDS_PER_SECOND / 10);
885 }
886
887 /***********************************************************/
888 /* TCG vCPU kick timer
889 *
890 * The kick timer is responsible for moving single threaded vCPU
891 * emulation on to the next vCPU. If more than one vCPU is running a
892 * timer event with force a cpu->exit so the next vCPU can get
893 * scheduled.
894 *
895 * The timer is removed if all vCPUs are idle and restarted again once
896 * idleness is complete.
897 */
898
899 static QEMUTimer *tcg_kick_vcpu_timer;
900 static CPUState *tcg_current_rr_cpu;
901
902 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
903
904 static inline int64_t qemu_tcg_next_kick(void)
905 {
906 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
907 }
908
909 /* Kick the currently round-robin scheduled vCPU to next */
910 static void qemu_cpu_kick_rr_next_cpu(void)
911 {
912 CPUState *cpu;
913 do {
914 cpu = atomic_mb_read(&tcg_current_rr_cpu);
915 if (cpu) {
916 cpu_exit(cpu);
917 }
918 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
919 }
920
921 /* Kick all RR vCPUs */
922 static void qemu_cpu_kick_rr_cpus(void)
923 {
924 CPUState *cpu;
925
926 CPU_FOREACH(cpu) {
927 cpu_exit(cpu);
928 };
929 }
930
931 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
932 {
933 }
934
935 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
936 {
937 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
938 qemu_notify_event();
939 return;
940 }
941
942 if (qemu_in_vcpu_thread()) {
943 /* A CPU is currently running; kick it back out to the
944 * tcg_cpu_exec() loop so it will recalculate its
945 * icount deadline immediately.
946 */
947 qemu_cpu_kick(current_cpu);
948 } else if (first_cpu) {
949 /* qemu_cpu_kick is not enough to kick a halted CPU out of
950 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
951 * causes cpu_thread_is_idle to return false. This way,
952 * handle_icount_deadline can run.
953 * If we have no CPUs at all for some reason, we don't
954 * need to do anything.
955 */
956 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
957 }
958 }
959
960 static void kick_tcg_thread(void *opaque)
961 {
962 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
963 qemu_cpu_kick_rr_next_cpu();
964 }
965
966 static void start_tcg_kick_timer(void)
967 {
968 assert(!mttcg_enabled);
969 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
970 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
971 kick_tcg_thread, NULL);
972 }
973 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
974 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
975 }
976 }
977
978 static void stop_tcg_kick_timer(void)
979 {
980 assert(!mttcg_enabled);
981 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
982 timer_del(tcg_kick_vcpu_timer);
983 }
984 }
985
986 /***********************************************************/
987 void hw_error(const char *fmt, ...)
988 {
989 va_list ap;
990 CPUState *cpu;
991
992 va_start(ap, fmt);
993 fprintf(stderr, "qemu: hardware error: ");
994 vfprintf(stderr, fmt, ap);
995 fprintf(stderr, "\n");
996 CPU_FOREACH(cpu) {
997 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
998 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
999 }
1000 va_end(ap);
1001 abort();
1002 }
1003
1004 void cpu_synchronize_all_states(void)
1005 {
1006 CPUState *cpu;
1007
1008 CPU_FOREACH(cpu) {
1009 cpu_synchronize_state(cpu);
1010 /* TODO: move to cpu_synchronize_state() */
1011 if (hvf_enabled()) {
1012 hvf_cpu_synchronize_state(cpu);
1013 }
1014 }
1015 }
1016
1017 void cpu_synchronize_all_post_reset(void)
1018 {
1019 CPUState *cpu;
1020
1021 CPU_FOREACH(cpu) {
1022 cpu_synchronize_post_reset(cpu);
1023 /* TODO: move to cpu_synchronize_post_reset() */
1024 if (hvf_enabled()) {
1025 hvf_cpu_synchronize_post_reset(cpu);
1026 }
1027 }
1028 }
1029
1030 void cpu_synchronize_all_post_init(void)
1031 {
1032 CPUState *cpu;
1033
1034 CPU_FOREACH(cpu) {
1035 cpu_synchronize_post_init(cpu);
1036 /* TODO: move to cpu_synchronize_post_init() */
1037 if (hvf_enabled()) {
1038 hvf_cpu_synchronize_post_init(cpu);
1039 }
1040 }
1041 }
1042
1043 void cpu_synchronize_all_pre_loadvm(void)
1044 {
1045 CPUState *cpu;
1046
1047 CPU_FOREACH(cpu) {
1048 cpu_synchronize_pre_loadvm(cpu);
1049 }
1050 }
1051
1052 static int do_vm_stop(RunState state, bool send_stop)
1053 {
1054 int ret = 0;
1055
1056 if (runstate_is_running()) {
1057 runstate_set(state);
1058 cpu_disable_ticks();
1059 pause_all_vcpus();
1060 vm_state_notify(0, state);
1061 if (send_stop) {
1062 qapi_event_send_stop();
1063 }
1064 }
1065
1066 bdrv_drain_all();
1067 ret = bdrv_flush_all();
1068
1069 return ret;
1070 }
1071
1072 /* Special vm_stop() variant for terminating the process. Historically clients
1073 * did not expect a QMP STOP event and so we need to retain compatibility.
1074 */
1075 int vm_shutdown(void)
1076 {
1077 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1078 }
1079
1080 static bool cpu_can_run(CPUState *cpu)
1081 {
1082 if (cpu->stop) {
1083 return false;
1084 }
1085 if (cpu_is_stopped(cpu)) {
1086 return false;
1087 }
1088 return true;
1089 }
1090
1091 static void cpu_handle_guest_debug(CPUState *cpu)
1092 {
1093 gdb_set_stop_cpu(cpu);
1094 qemu_system_debug_request();
1095 cpu->stopped = true;
1096 }
1097
1098 #ifdef CONFIG_LINUX
1099 static void sigbus_reraise(void)
1100 {
1101 sigset_t set;
1102 struct sigaction action;
1103
1104 memset(&action, 0, sizeof(action));
1105 action.sa_handler = SIG_DFL;
1106 if (!sigaction(SIGBUS, &action, NULL)) {
1107 raise(SIGBUS);
1108 sigemptyset(&set);
1109 sigaddset(&set, SIGBUS);
1110 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
1111 }
1112 perror("Failed to re-raise SIGBUS!\n");
1113 abort();
1114 }
1115
1116 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1117 {
1118 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1119 sigbus_reraise();
1120 }
1121
1122 if (current_cpu) {
1123 /* Called asynchronously in VCPU thread. */
1124 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1125 sigbus_reraise();
1126 }
1127 } else {
1128 /* Called synchronously (via signalfd) in main thread. */
1129 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1130 sigbus_reraise();
1131 }
1132 }
1133 }
1134
1135 static void qemu_init_sigbus(void)
1136 {
1137 struct sigaction action;
1138
1139 memset(&action, 0, sizeof(action));
1140 action.sa_flags = SA_SIGINFO;
1141 action.sa_sigaction = sigbus_handler;
1142 sigaction(SIGBUS, &action, NULL);
1143
1144 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1145 }
1146 #else /* !CONFIG_LINUX */
1147 static void qemu_init_sigbus(void)
1148 {
1149 }
1150 #endif /* !CONFIG_LINUX */
1151
1152 static QemuThread io_thread;
1153
1154 /* cpu creation */
1155 static QemuCond qemu_cpu_cond;
1156 /* system init */
1157 static QemuCond qemu_pause_cond;
1158
1159 void qemu_init_cpu_loop(void)
1160 {
1161 qemu_init_sigbus();
1162 qemu_cond_init(&qemu_cpu_cond);
1163 qemu_cond_init(&qemu_pause_cond);
1164 qemu_mutex_init(&qemu_global_mutex);
1165
1166 qemu_thread_get_self(&io_thread);
1167 }
1168
1169 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1170 {
1171 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1172 }
1173
1174 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1175 {
1176 if (kvm_destroy_vcpu(cpu) < 0) {
1177 error_report("kvm_destroy_vcpu failed");
1178 exit(EXIT_FAILURE);
1179 }
1180 }
1181
1182 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1183 {
1184 }
1185
1186 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1187 {
1188 g_assert(qemu_cpu_is_self(cpu));
1189 cpu->stop = false;
1190 cpu->stopped = true;
1191 if (exit) {
1192 cpu_exit(cpu);
1193 }
1194 qemu_cond_broadcast(&qemu_pause_cond);
1195 }
1196
1197 static void qemu_wait_io_event_common(CPUState *cpu)
1198 {
1199 atomic_mb_set(&cpu->thread_kicked, false);
1200 if (cpu->stop) {
1201 qemu_cpu_stop(cpu, false);
1202 }
1203 process_queued_cpu_work(cpu);
1204 }
1205
1206 static void qemu_tcg_rr_wait_io_event(void)
1207 {
1208 CPUState *cpu;
1209
1210 while (all_cpu_threads_idle()) {
1211 stop_tcg_kick_timer();
1212 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1213 }
1214
1215 start_tcg_kick_timer();
1216
1217 CPU_FOREACH(cpu) {
1218 qemu_wait_io_event_common(cpu);
1219 }
1220 }
1221
1222 static void qemu_wait_io_event(CPUState *cpu)
1223 {
1224 bool slept = false;
1225
1226 while (cpu_thread_is_idle(cpu)) {
1227 if (!slept) {
1228 slept = true;
1229 qemu_plugin_vcpu_idle_cb(cpu);
1230 }
1231 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1232 }
1233 if (slept) {
1234 qemu_plugin_vcpu_resume_cb(cpu);
1235 }
1236
1237 #ifdef _WIN32
1238 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1239 if (!tcg_enabled()) {
1240 SleepEx(0, TRUE);
1241 }
1242 #endif
1243 qemu_wait_io_event_common(cpu);
1244 }
1245
1246 static void *qemu_kvm_cpu_thread_fn(void *arg)
1247 {
1248 CPUState *cpu = arg;
1249 int r;
1250
1251 rcu_register_thread();
1252
1253 qemu_mutex_lock_iothread();
1254 qemu_thread_get_self(cpu->thread);
1255 cpu->thread_id = qemu_get_thread_id();
1256 cpu->can_do_io = 1;
1257 current_cpu = cpu;
1258
1259 r = kvm_init_vcpu(cpu);
1260 if (r < 0) {
1261 error_report("kvm_init_vcpu failed: %s", strerror(-r));
1262 exit(1);
1263 }
1264
1265 kvm_init_cpu_signals(cpu);
1266
1267 /* signal CPU creation */
1268 cpu->created = true;
1269 qemu_cond_signal(&qemu_cpu_cond);
1270 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1271
1272 do {
1273 if (cpu_can_run(cpu)) {
1274 r = kvm_cpu_exec(cpu);
1275 if (r == EXCP_DEBUG) {
1276 cpu_handle_guest_debug(cpu);
1277 }
1278 }
1279 qemu_wait_io_event(cpu);
1280 } while (!cpu->unplug || cpu_can_run(cpu));
1281
1282 qemu_kvm_destroy_vcpu(cpu);
1283 cpu->created = false;
1284 qemu_cond_signal(&qemu_cpu_cond);
1285 qemu_mutex_unlock_iothread();
1286 rcu_unregister_thread();
1287 return NULL;
1288 }
1289
1290 static void *qemu_dummy_cpu_thread_fn(void *arg)
1291 {
1292 #ifdef _WIN32
1293 error_report("qtest is not supported under Windows");
1294 exit(1);
1295 #else
1296 CPUState *cpu = arg;
1297 sigset_t waitset;
1298 int r;
1299
1300 rcu_register_thread();
1301
1302 qemu_mutex_lock_iothread();
1303 qemu_thread_get_self(cpu->thread);
1304 cpu->thread_id = qemu_get_thread_id();
1305 cpu->can_do_io = 1;
1306 current_cpu = cpu;
1307
1308 sigemptyset(&waitset);
1309 sigaddset(&waitset, SIG_IPI);
1310
1311 /* signal CPU creation */
1312 cpu->created = true;
1313 qemu_cond_signal(&qemu_cpu_cond);
1314 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1315
1316 do {
1317 qemu_mutex_unlock_iothread();
1318 do {
1319 int sig;
1320 r = sigwait(&waitset, &sig);
1321 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1322 if (r == -1) {
1323 perror("sigwait");
1324 exit(1);
1325 }
1326 qemu_mutex_lock_iothread();
1327 qemu_wait_io_event(cpu);
1328 } while (!cpu->unplug);
1329
1330 qemu_mutex_unlock_iothread();
1331 rcu_unregister_thread();
1332 return NULL;
1333 #endif
1334 }
1335
1336 static int64_t tcg_get_icount_limit(void)
1337 {
1338 int64_t deadline;
1339
1340 if (replay_mode != REPLAY_MODE_PLAY) {
1341 /*
1342 * Include all the timers, because they may need an attention.
1343 * Too long CPU execution may create unnecessary delay in UI.
1344 */
1345 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1346 QEMU_TIMER_ATTR_ALL);
1347 /* Check realtime timers, because they help with input processing */
1348 deadline = qemu_soonest_timeout(deadline,
1349 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1350 QEMU_TIMER_ATTR_ALL));
1351
1352 /* Maintain prior (possibly buggy) behaviour where if no deadline
1353 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1354 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1355 * nanoseconds.
1356 */
1357 if ((deadline < 0) || (deadline > INT32_MAX)) {
1358 deadline = INT32_MAX;
1359 }
1360
1361 return qemu_icount_round(deadline);
1362 } else {
1363 return replay_get_instructions();
1364 }
1365 }
1366
1367 static void handle_icount_deadline(void)
1368 {
1369 assert(qemu_in_vcpu_thread());
1370 if (use_icount) {
1371 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1372 QEMU_TIMER_ATTR_ALL);
1373
1374 if (deadline == 0) {
1375 /* Wake up other AioContexts. */
1376 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1377 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1378 }
1379 }
1380 }
1381
1382 static void prepare_icount_for_run(CPUState *cpu)
1383 {
1384 if (use_icount) {
1385 int insns_left;
1386
1387 /* These should always be cleared by process_icount_data after
1388 * each vCPU execution. However u16.high can be raised
1389 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1390 */
1391 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
1392 g_assert(cpu->icount_extra == 0);
1393
1394 cpu->icount_budget = tcg_get_icount_limit();
1395 insns_left = MIN(0xffff, cpu->icount_budget);
1396 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
1397 cpu->icount_extra = cpu->icount_budget - insns_left;
1398
1399 replay_mutex_lock();
1400 }
1401 }
1402
1403 static void process_icount_data(CPUState *cpu)
1404 {
1405 if (use_icount) {
1406 /* Account for executed instructions */
1407 cpu_update_icount(cpu);
1408
1409 /* Reset the counters */
1410 cpu_neg(cpu)->icount_decr.u16.low = 0;
1411 cpu->icount_extra = 0;
1412 cpu->icount_budget = 0;
1413
1414 replay_account_executed_instructions();
1415
1416 replay_mutex_unlock();
1417 }
1418 }
1419
1420
1421 static int tcg_cpu_exec(CPUState *cpu)
1422 {
1423 int ret;
1424 #ifdef CONFIG_PROFILER
1425 int64_t ti;
1426 #endif
1427
1428 assert(tcg_enabled());
1429 #ifdef CONFIG_PROFILER
1430 ti = profile_getclock();
1431 #endif
1432 cpu_exec_start(cpu);
1433 ret = cpu_exec(cpu);
1434 cpu_exec_end(cpu);
1435 #ifdef CONFIG_PROFILER
1436 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1437 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
1438 #endif
1439 return ret;
1440 }
1441
1442 /* Destroy any remaining vCPUs which have been unplugged and have
1443 * finished running
1444 */
1445 static void deal_with_unplugged_cpus(void)
1446 {
1447 CPUState *cpu;
1448
1449 CPU_FOREACH(cpu) {
1450 if (cpu->unplug && !cpu_can_run(cpu)) {
1451 qemu_tcg_destroy_vcpu(cpu);
1452 cpu->created = false;
1453 qemu_cond_signal(&qemu_cpu_cond);
1454 break;
1455 }
1456 }
1457 }
1458
1459 /* Single-threaded TCG
1460 *
1461 * In the single-threaded case each vCPU is simulated in turn. If
1462 * there is more than a single vCPU we create a simple timer to kick
1463 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1464 * This is done explicitly rather than relying on side-effects
1465 * elsewhere.
1466 */
1467
1468 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1469 {
1470 CPUState *cpu = arg;
1471
1472 assert(tcg_enabled());
1473 rcu_register_thread();
1474 tcg_register_thread();
1475
1476 qemu_mutex_lock_iothread();
1477 qemu_thread_get_self(cpu->thread);
1478
1479 cpu->thread_id = qemu_get_thread_id();
1480 cpu->created = true;
1481 cpu->can_do_io = 1;
1482 qemu_cond_signal(&qemu_cpu_cond);
1483 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1484
1485 /* wait for initial kick-off after machine start */
1486 while (first_cpu->stopped) {
1487 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1488
1489 /* process any pending work */
1490 CPU_FOREACH(cpu) {
1491 current_cpu = cpu;
1492 qemu_wait_io_event_common(cpu);
1493 }
1494 }
1495
1496 start_tcg_kick_timer();
1497
1498 cpu = first_cpu;
1499
1500 /* process any pending work */
1501 cpu->exit_request = 1;
1502
1503 while (1) {
1504 qemu_mutex_unlock_iothread();
1505 replay_mutex_lock();
1506 qemu_mutex_lock_iothread();
1507 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1508 qemu_account_warp_timer();
1509
1510 /* Run the timers here. This is much more efficient than
1511 * waking up the I/O thread and waiting for completion.
1512 */
1513 handle_icount_deadline();
1514
1515 replay_mutex_unlock();
1516
1517 if (!cpu) {
1518 cpu = first_cpu;
1519 }
1520
1521 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1522
1523 atomic_mb_set(&tcg_current_rr_cpu, cpu);
1524 current_cpu = cpu;
1525
1526 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1527 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1528
1529 if (cpu_can_run(cpu)) {
1530 int r;
1531
1532 qemu_mutex_unlock_iothread();
1533 prepare_icount_for_run(cpu);
1534
1535 r = tcg_cpu_exec(cpu);
1536
1537 process_icount_data(cpu);
1538 qemu_mutex_lock_iothread();
1539
1540 if (r == EXCP_DEBUG) {
1541 cpu_handle_guest_debug(cpu);
1542 break;
1543 } else if (r == EXCP_ATOMIC) {
1544 qemu_mutex_unlock_iothread();
1545 cpu_exec_step_atomic(cpu);
1546 qemu_mutex_lock_iothread();
1547 break;
1548 }
1549 } else if (cpu->stop) {
1550 if (cpu->unplug) {
1551 cpu = CPU_NEXT(cpu);
1552 }
1553 break;
1554 }
1555
1556 cpu = CPU_NEXT(cpu);
1557 } /* while (cpu && !cpu->exit_request).. */
1558
1559 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1560 atomic_set(&tcg_current_rr_cpu, NULL);
1561
1562 if (cpu && cpu->exit_request) {
1563 atomic_mb_set(&cpu->exit_request, 0);
1564 }
1565
1566 if (use_icount && all_cpu_threads_idle()) {
1567 /*
1568 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1569 * in the main_loop, wake it up in order to start the warp timer.
1570 */
1571 qemu_notify_event();
1572 }
1573
1574 qemu_tcg_rr_wait_io_event();
1575 deal_with_unplugged_cpus();
1576 }
1577
1578 rcu_unregister_thread();
1579 return NULL;
1580 }
1581
1582 static void *qemu_hax_cpu_thread_fn(void *arg)
1583 {
1584 CPUState *cpu = arg;
1585 int r;
1586
1587 rcu_register_thread();
1588 qemu_mutex_lock_iothread();
1589 qemu_thread_get_self(cpu->thread);
1590
1591 cpu->thread_id = qemu_get_thread_id();
1592 cpu->created = true;
1593 current_cpu = cpu;
1594
1595 hax_init_vcpu(cpu);
1596 qemu_cond_signal(&qemu_cpu_cond);
1597 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1598
1599 do {
1600 if (cpu_can_run(cpu)) {
1601 r = hax_smp_cpu_exec(cpu);
1602 if (r == EXCP_DEBUG) {
1603 cpu_handle_guest_debug(cpu);
1604 }
1605 }
1606
1607 qemu_wait_io_event(cpu);
1608 } while (!cpu->unplug || cpu_can_run(cpu));
1609 rcu_unregister_thread();
1610 return NULL;
1611 }
1612
1613 /* The HVF-specific vCPU thread function. This one should only run when the host
1614 * CPU supports the VMX "unrestricted guest" feature. */
1615 static void *qemu_hvf_cpu_thread_fn(void *arg)
1616 {
1617 CPUState *cpu = arg;
1618
1619 int r;
1620
1621 assert(hvf_enabled());
1622
1623 rcu_register_thread();
1624
1625 qemu_mutex_lock_iothread();
1626 qemu_thread_get_self(cpu->thread);
1627
1628 cpu->thread_id = qemu_get_thread_id();
1629 cpu->can_do_io = 1;
1630 current_cpu = cpu;
1631
1632 hvf_init_vcpu(cpu);
1633
1634 /* signal CPU creation */
1635 cpu->created = true;
1636 qemu_cond_signal(&qemu_cpu_cond);
1637 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1638
1639 do {
1640 if (cpu_can_run(cpu)) {
1641 r = hvf_vcpu_exec(cpu);
1642 if (r == EXCP_DEBUG) {
1643 cpu_handle_guest_debug(cpu);
1644 }
1645 }
1646 qemu_wait_io_event(cpu);
1647 } while (!cpu->unplug || cpu_can_run(cpu));
1648
1649 hvf_vcpu_destroy(cpu);
1650 cpu->created = false;
1651 qemu_cond_signal(&qemu_cpu_cond);
1652 qemu_mutex_unlock_iothread();
1653 rcu_unregister_thread();
1654 return NULL;
1655 }
1656
1657 static void *qemu_whpx_cpu_thread_fn(void *arg)
1658 {
1659 CPUState *cpu = arg;
1660 int r;
1661
1662 rcu_register_thread();
1663
1664 qemu_mutex_lock_iothread();
1665 qemu_thread_get_self(cpu->thread);
1666 cpu->thread_id = qemu_get_thread_id();
1667 current_cpu = cpu;
1668
1669 r = whpx_init_vcpu(cpu);
1670 if (r < 0) {
1671 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1672 exit(1);
1673 }
1674
1675 /* signal CPU creation */
1676 cpu->created = true;
1677 qemu_cond_signal(&qemu_cpu_cond);
1678 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1679
1680 do {
1681 if (cpu_can_run(cpu)) {
1682 r = whpx_vcpu_exec(cpu);
1683 if (r == EXCP_DEBUG) {
1684 cpu_handle_guest_debug(cpu);
1685 }
1686 }
1687 while (cpu_thread_is_idle(cpu)) {
1688 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1689 }
1690 qemu_wait_io_event_common(cpu);
1691 } while (!cpu->unplug || cpu_can_run(cpu));
1692
1693 whpx_destroy_vcpu(cpu);
1694 cpu->created = false;
1695 qemu_cond_signal(&qemu_cpu_cond);
1696 qemu_mutex_unlock_iothread();
1697 rcu_unregister_thread();
1698 return NULL;
1699 }
1700
1701 #ifdef _WIN32
1702 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1703 {
1704 }
1705 #endif
1706
1707 /* Multi-threaded TCG
1708 *
1709 * In the multi-threaded case each vCPU has its own thread. The TLS
1710 * variable current_cpu can be used deep in the code to find the
1711 * current CPUState for a given thread.
1712 */
1713
1714 static void *qemu_tcg_cpu_thread_fn(void *arg)
1715 {
1716 CPUState *cpu = arg;
1717
1718 assert(tcg_enabled());
1719 g_assert(!use_icount);
1720
1721 rcu_register_thread();
1722 tcg_register_thread();
1723
1724 qemu_mutex_lock_iothread();
1725 qemu_thread_get_self(cpu->thread);
1726
1727 cpu->thread_id = qemu_get_thread_id();
1728 cpu->created = true;
1729 cpu->can_do_io = 1;
1730 current_cpu = cpu;
1731 qemu_cond_signal(&qemu_cpu_cond);
1732 qemu_guest_random_seed_thread_part2(cpu->random_seed);
1733
1734 /* process any pending work */
1735 cpu->exit_request = 1;
1736
1737 do {
1738 if (cpu_can_run(cpu)) {
1739 int r;
1740 qemu_mutex_unlock_iothread();
1741 r = tcg_cpu_exec(cpu);
1742 qemu_mutex_lock_iothread();
1743 switch (r) {
1744 case EXCP_DEBUG:
1745 cpu_handle_guest_debug(cpu);
1746 break;
1747 case EXCP_HALTED:
1748 /* during start-up the vCPU is reset and the thread is
1749 * kicked several times. If we don't ensure we go back
1750 * to sleep in the halted state we won't cleanly
1751 * start-up when the vCPU is enabled.
1752 *
1753 * cpu->halted should ensure we sleep in wait_io_event
1754 */
1755 g_assert(cpu->halted);
1756 break;
1757 case EXCP_ATOMIC:
1758 qemu_mutex_unlock_iothread();
1759 cpu_exec_step_atomic(cpu);
1760 qemu_mutex_lock_iothread();
1761 default:
1762 /* Ignore everything else? */
1763 break;
1764 }
1765 }
1766
1767 atomic_mb_set(&cpu->exit_request, 0);
1768 qemu_wait_io_event(cpu);
1769 } while (!cpu->unplug || cpu_can_run(cpu));
1770
1771 qemu_tcg_destroy_vcpu(cpu);
1772 cpu->created = false;
1773 qemu_cond_signal(&qemu_cpu_cond);
1774 qemu_mutex_unlock_iothread();
1775 rcu_unregister_thread();
1776 return NULL;
1777 }
1778
1779 static void qemu_cpu_kick_thread(CPUState *cpu)
1780 {
1781 #ifndef _WIN32
1782 int err;
1783
1784 if (cpu->thread_kicked) {
1785 return;
1786 }
1787 cpu->thread_kicked = true;
1788 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1789 if (err && err != ESRCH) {
1790 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1791 exit(1);
1792 }
1793 #else /* _WIN32 */
1794 if (!qemu_cpu_is_self(cpu)) {
1795 if (whpx_enabled()) {
1796 whpx_vcpu_kick(cpu);
1797 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1798 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1799 __func__, GetLastError());
1800 exit(1);
1801 }
1802 }
1803 #endif
1804 }
1805
1806 void qemu_cpu_kick(CPUState *cpu)
1807 {
1808 qemu_cond_broadcast(cpu->halt_cond);
1809 if (tcg_enabled()) {
1810 if (qemu_tcg_mttcg_enabled()) {
1811 cpu_exit(cpu);
1812 } else {
1813 qemu_cpu_kick_rr_cpus();
1814 }
1815 } else {
1816 if (hax_enabled()) {
1817 /*
1818 * FIXME: race condition with the exit_request check in
1819 * hax_vcpu_hax_exec
1820 */
1821 cpu->exit_request = 1;
1822 }
1823 qemu_cpu_kick_thread(cpu);
1824 }
1825 }
1826
1827 void qemu_cpu_kick_self(void)
1828 {
1829 assert(current_cpu);
1830 qemu_cpu_kick_thread(current_cpu);
1831 }
1832
1833 bool qemu_cpu_is_self(CPUState *cpu)
1834 {
1835 return qemu_thread_is_self(cpu->thread);
1836 }
1837
1838 bool qemu_in_vcpu_thread(void)
1839 {
1840 return current_cpu && qemu_cpu_is_self(current_cpu);
1841 }
1842
1843 static __thread bool iothread_locked = false;
1844
1845 bool qemu_mutex_iothread_locked(void)
1846 {
1847 return iothread_locked;
1848 }
1849
1850 /*
1851 * The BQL is taken from so many places that it is worth profiling the
1852 * callers directly, instead of funneling them all through a single function.
1853 */
1854 void qemu_mutex_lock_iothread_impl(const char *file, int line)
1855 {
1856 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1857
1858 g_assert(!qemu_mutex_iothread_locked());
1859 bql_lock(&qemu_global_mutex, file, line);
1860 iothread_locked = true;
1861 }
1862
1863 void qemu_mutex_unlock_iothread(void)
1864 {
1865 g_assert(qemu_mutex_iothread_locked());
1866 iothread_locked = false;
1867 qemu_mutex_unlock(&qemu_global_mutex);
1868 }
1869
1870 void qemu_cond_wait_iothread(QemuCond *cond)
1871 {
1872 qemu_cond_wait(cond, &qemu_global_mutex);
1873 }
1874
1875 static bool all_vcpus_paused(void)
1876 {
1877 CPUState *cpu;
1878
1879 CPU_FOREACH(cpu) {
1880 if (!cpu->stopped) {
1881 return false;
1882 }
1883 }
1884
1885 return true;
1886 }
1887
1888 void pause_all_vcpus(void)
1889 {
1890 CPUState *cpu;
1891
1892 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1893 CPU_FOREACH(cpu) {
1894 if (qemu_cpu_is_self(cpu)) {
1895 qemu_cpu_stop(cpu, true);
1896 } else {
1897 cpu->stop = true;
1898 qemu_cpu_kick(cpu);
1899 }
1900 }
1901
1902 /* We need to drop the replay_lock so any vCPU threads woken up
1903 * can finish their replay tasks
1904 */
1905 replay_mutex_unlock();
1906
1907 while (!all_vcpus_paused()) {
1908 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1909 CPU_FOREACH(cpu) {
1910 qemu_cpu_kick(cpu);
1911 }
1912 }
1913
1914 qemu_mutex_unlock_iothread();
1915 replay_mutex_lock();
1916 qemu_mutex_lock_iothread();
1917 }
1918
1919 void cpu_resume(CPUState *cpu)
1920 {
1921 cpu->stop = false;
1922 cpu->stopped = false;
1923 qemu_cpu_kick(cpu);
1924 }
1925
1926 void resume_all_vcpus(void)
1927 {
1928 CPUState *cpu;
1929
1930 if (!runstate_is_running()) {
1931 return;
1932 }
1933
1934 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1935 CPU_FOREACH(cpu) {
1936 cpu_resume(cpu);
1937 }
1938 }
1939
1940 void cpu_remove_sync(CPUState *cpu)
1941 {
1942 cpu->stop = true;
1943 cpu->unplug = true;
1944 qemu_cpu_kick(cpu);
1945 qemu_mutex_unlock_iothread();
1946 qemu_thread_join(cpu->thread);
1947 qemu_mutex_lock_iothread();
1948 }
1949
1950 /* For temporary buffers for forming a name */
1951 #define VCPU_THREAD_NAME_SIZE 16
1952
1953 static void qemu_tcg_init_vcpu(CPUState *cpu)
1954 {
1955 char thread_name[VCPU_THREAD_NAME_SIZE];
1956 static QemuCond *single_tcg_halt_cond;
1957 static QemuThread *single_tcg_cpu_thread;
1958 static int tcg_region_inited;
1959
1960 assert(tcg_enabled());
1961 /*
1962 * Initialize TCG regions--once. Now is a good time, because:
1963 * (1) TCG's init context, prologue and target globals have been set up.
1964 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1965 * -accel flag is processed, so the check doesn't work then).
1966 */
1967 if (!tcg_region_inited) {
1968 tcg_region_inited = 1;
1969 tcg_region_init();
1970 }
1971
1972 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1973 cpu->thread = g_malloc0(sizeof(QemuThread));
1974 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1975 qemu_cond_init(cpu->halt_cond);
1976
1977 if (qemu_tcg_mttcg_enabled()) {
1978 /* create a thread per vCPU with TCG (MTTCG) */
1979 parallel_cpus = true;
1980 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1981 cpu->cpu_index);
1982
1983 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1984 cpu, QEMU_THREAD_JOINABLE);
1985
1986 } else {
1987 /* share a single thread for all cpus with TCG */
1988 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1989 qemu_thread_create(cpu->thread, thread_name,
1990 qemu_tcg_rr_cpu_thread_fn,
1991 cpu, QEMU_THREAD_JOINABLE);
1992
1993 single_tcg_halt_cond = cpu->halt_cond;
1994 single_tcg_cpu_thread = cpu->thread;
1995 }
1996 #ifdef _WIN32
1997 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1998 #endif
1999 } else {
2000 /* For non-MTTCG cases we share the thread */
2001 cpu->thread = single_tcg_cpu_thread;
2002 cpu->halt_cond = single_tcg_halt_cond;
2003 cpu->thread_id = first_cpu->thread_id;
2004 cpu->can_do_io = 1;
2005 cpu->created = true;
2006 }
2007 }
2008
2009 static void qemu_hax_start_vcpu(CPUState *cpu)
2010 {
2011 char thread_name[VCPU_THREAD_NAME_SIZE];
2012
2013 cpu->thread = g_malloc0(sizeof(QemuThread));
2014 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2015 qemu_cond_init(cpu->halt_cond);
2016
2017 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
2018 cpu->cpu_index);
2019 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2020 cpu, QEMU_THREAD_JOINABLE);
2021 #ifdef _WIN32
2022 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2023 #endif
2024 }
2025
2026 static void qemu_kvm_start_vcpu(CPUState *cpu)
2027 {
2028 char thread_name[VCPU_THREAD_NAME_SIZE];
2029
2030 cpu->thread = g_malloc0(sizeof(QemuThread));
2031 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2032 qemu_cond_init(cpu->halt_cond);
2033 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2034 cpu->cpu_index);
2035 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2036 cpu, QEMU_THREAD_JOINABLE);
2037 }
2038
2039 static void qemu_hvf_start_vcpu(CPUState *cpu)
2040 {
2041 char thread_name[VCPU_THREAD_NAME_SIZE];
2042
2043 /* HVF currently does not support TCG, and only runs in
2044 * unrestricted-guest mode. */
2045 assert(hvf_enabled());
2046
2047 cpu->thread = g_malloc0(sizeof(QemuThread));
2048 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2049 qemu_cond_init(cpu->halt_cond);
2050
2051 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2052 cpu->cpu_index);
2053 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2054 cpu, QEMU_THREAD_JOINABLE);
2055 }
2056
2057 static void qemu_whpx_start_vcpu(CPUState *cpu)
2058 {
2059 char thread_name[VCPU_THREAD_NAME_SIZE];
2060
2061 cpu->thread = g_malloc0(sizeof(QemuThread));
2062 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2063 qemu_cond_init(cpu->halt_cond);
2064 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2065 cpu->cpu_index);
2066 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2067 cpu, QEMU_THREAD_JOINABLE);
2068 #ifdef _WIN32
2069 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2070 #endif
2071 }
2072
2073 static void qemu_dummy_start_vcpu(CPUState *cpu)
2074 {
2075 char thread_name[VCPU_THREAD_NAME_SIZE];
2076
2077 cpu->thread = g_malloc0(sizeof(QemuThread));
2078 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2079 qemu_cond_init(cpu->halt_cond);
2080 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2081 cpu->cpu_index);
2082 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
2083 QEMU_THREAD_JOINABLE);
2084 }
2085
2086 void qemu_init_vcpu(CPUState *cpu)
2087 {
2088 MachineState *ms = MACHINE(qdev_get_machine());
2089
2090 cpu->nr_cores = ms->smp.cores;
2091 cpu->nr_threads = ms->smp.threads;
2092 cpu->stopped = true;
2093 cpu->random_seed = qemu_guest_random_seed_thread_part1();
2094
2095 if (!cpu->as) {
2096 /* If the target cpu hasn't set up any address spaces itself,
2097 * give it the default one.
2098 */
2099 cpu->num_ases = 1;
2100 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
2101 }
2102
2103 if (kvm_enabled()) {
2104 qemu_kvm_start_vcpu(cpu);
2105 } else if (hax_enabled()) {
2106 qemu_hax_start_vcpu(cpu);
2107 } else if (hvf_enabled()) {
2108 qemu_hvf_start_vcpu(cpu);
2109 } else if (tcg_enabled()) {
2110 qemu_tcg_init_vcpu(cpu);
2111 } else if (whpx_enabled()) {
2112 qemu_whpx_start_vcpu(cpu);
2113 } else {
2114 qemu_dummy_start_vcpu(cpu);
2115 }
2116
2117 while (!cpu->created) {
2118 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2119 }
2120 }
2121
2122 void cpu_stop_current(void)
2123 {
2124 if (current_cpu) {
2125 current_cpu->stop = true;
2126 cpu_exit(current_cpu);
2127 }
2128 }
2129
2130 int vm_stop(RunState state)
2131 {
2132 if (qemu_in_vcpu_thread()) {
2133 qemu_system_vmstop_request_prepare();
2134 qemu_system_vmstop_request(state);
2135 /*
2136 * FIXME: should not return to device code in case
2137 * vm_stop() has been requested.
2138 */
2139 cpu_stop_current();
2140 return 0;
2141 }
2142
2143 return do_vm_stop(state, true);
2144 }
2145
2146 /**
2147 * Prepare for (re)starting the VM.
2148 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2149 * running or in case of an error condition), 0 otherwise.
2150 */
2151 int vm_prepare_start(void)
2152 {
2153 RunState requested;
2154
2155 qemu_vmstop_requested(&requested);
2156 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2157 return -1;
2158 }
2159
2160 /* Ensure that a STOP/RESUME pair of events is emitted if a
2161 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2162 * example, according to documentation is always followed by
2163 * the STOP event.
2164 */
2165 if (runstate_is_running()) {
2166 qapi_event_send_stop();
2167 qapi_event_send_resume();
2168 return -1;
2169 }
2170
2171 /* We are sending this now, but the CPUs will be resumed shortly later */
2172 qapi_event_send_resume();
2173
2174 cpu_enable_ticks();
2175 runstate_set(RUN_STATE_RUNNING);
2176 vm_state_notify(1, RUN_STATE_RUNNING);
2177 return 0;
2178 }
2179
2180 void vm_start(void)
2181 {
2182 if (!vm_prepare_start()) {
2183 resume_all_vcpus();
2184 }
2185 }
2186
2187 /* does a state transition even if the VM is already stopped,
2188 current state is forgotten forever */
2189 int vm_stop_force_state(RunState state)
2190 {
2191 if (runstate_is_running()) {
2192 return vm_stop(state);
2193 } else {
2194 runstate_set(state);
2195
2196 bdrv_drain_all();
2197 /* Make sure to return an error if the flush in a previous vm_stop()
2198 * failed. */
2199 return bdrv_flush_all();
2200 }
2201 }
2202
2203 void list_cpus(const char *optarg)
2204 {
2205 /* XXX: implement xxx_cpu_list for targets that still miss it */
2206 #if defined(cpu_list)
2207 cpu_list();
2208 #endif
2209 }
2210
2211 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2212 bool has_cpu, int64_t cpu_index, Error **errp)
2213 {
2214 FILE *f;
2215 uint32_t l;
2216 CPUState *cpu;
2217 uint8_t buf[1024];
2218 int64_t orig_addr = addr, orig_size = size;
2219
2220 if (!has_cpu) {
2221 cpu_index = 0;
2222 }
2223
2224 cpu = qemu_get_cpu(cpu_index);
2225 if (cpu == NULL) {
2226 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2227 "a CPU number");
2228 return;
2229 }
2230
2231 f = fopen(filename, "wb");
2232 if (!f) {
2233 error_setg_file_open(errp, errno, filename);
2234 return;
2235 }
2236
2237 while (size != 0) {
2238 l = sizeof(buf);
2239 if (l > size)
2240 l = size;
2241 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2242 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2243 " specified", orig_addr, orig_size);
2244 goto exit;
2245 }
2246 if (fwrite(buf, 1, l, f) != l) {
2247 error_setg(errp, QERR_IO_ERROR);
2248 goto exit;
2249 }
2250 addr += l;
2251 size -= l;
2252 }
2253
2254 exit:
2255 fclose(f);
2256 }
2257
2258 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2259 Error **errp)
2260 {
2261 FILE *f;
2262 uint32_t l;
2263 uint8_t buf[1024];
2264
2265 f = fopen(filename, "wb");
2266 if (!f) {
2267 error_setg_file_open(errp, errno, filename);
2268 return;
2269 }
2270
2271 while (size != 0) {
2272 l = sizeof(buf);
2273 if (l > size)
2274 l = size;
2275 cpu_physical_memory_read(addr, buf, l);
2276 if (fwrite(buf, 1, l, f) != l) {
2277 error_setg(errp, QERR_IO_ERROR);
2278 goto exit;
2279 }
2280 addr += l;
2281 size -= l;
2282 }
2283
2284 exit:
2285 fclose(f);
2286 }
2287
2288 void qmp_inject_nmi(Error **errp)
2289 {
2290 nmi_monitor_handle(monitor_get_cpu_index(), errp);
2291 }
2292
2293 void dump_drift_info(void)
2294 {
2295 if (!use_icount) {
2296 return;
2297 }
2298
2299 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
2300 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2301 if (icount_align_option) {
2302 qemu_printf("Max guest delay %"PRIi64" ms\n",
2303 -max_delay / SCALE_MS);
2304 qemu_printf("Max guest advance %"PRIi64" ms\n",
2305 max_advance / SCALE_MS);
2306 } else {
2307 qemu_printf("Max guest delay NA\n");
2308 qemu_printf("Max guest advance NA\n");
2309 }
2310 }